1 Kindara app and dataset

A de-idendified dataset was provided for this study by the app Kindara, which is dedicated to provide menstruating individual with a platform to digitally track their cycles and associated fertility signs.

knitr::include_graphics("../Figures Tables Media/Media/kindara_screen.PNG")

knitr::opts_chunk$set(echo = TRUE, cache = TRUE)

2 Data preparation

2.1 Load CSV, filter users, save feather files

Transform CSV into feather files

input_folder = paste0(IO$input_data, "Days/")
output_folder = paste0(IO$tmp_data,"Days_feather_from_csv/")
if(!dir.exists(output_folder)){dir.create(output_folder)}

files = list.files(input_folder)

tic()
cl = makeCluster(par$n_cores, outfile="")
registerDoParallel(cl)

users = foreach(file = files, .combine = rbind, .packages = c("feather","readr","plyr","dplyr")) %dopar%{
  
  #days = read.csv(paste0(input_folder,file), stringsAsFactors = FALSE) 
  days = read_tsv(file = paste0(input_folder,file),
                  col_types = cols(
                    id = col_character(),
                    date = col_date(format = "%Y-%m-%d %H:%M:%S"),
                    first_day = col_logical(),
                    conception = col_logical(),
                    temperature = col_double(),
                    temp_time = col_datetime(format = ""),
                    temp_source = col_integer(),
                    questionable_temp = col_logical(),
                    no_fluid = col_logical(),
                    fluid_sticky = col_integer(),
                    fluid_creamy = col_integer(),
                    fluid_eggwhite = col_integer(),
                    fluid_watery = col_integer(),
                    cervix_height = col_integer(),
                    cervix_openness = col_integer(),
                    cervix_firmness = col_integer(),
                    opk = col_integer(),
                    preg_test = col_integer(),
                    ferning = col_skip(),
                    prg_test = col_skip(),
                    menstruation = col_integer(), 
                    spotting = col_logical(),
                    sex = col_integer(),
                    vaginal_sensation = col_skip(), #col_integer(),
                    custom = col_character(),
                    moods = col_character(),
                    symptoms = col_character()
                  ))
  
  # colnames
  colnames(days)[colnames(days) == "id"] = "user_id"
  
  # identifying
  users_with_pos_preg_tests = unique(days$user_id[which(days$preg_test == 1)])
  users = data.frame(user_id = users_with_pos_preg_tests, kindara_csv_file = file)
  
  # formating pregnancy tests
  days = mutate(days,
                preg_test_o = preg_test,
                preg_test = ifelse(preg_test == 2, -1, preg_test))
  
  
  
  new_file_name = gsub("csv","feather",file)
  write_feather(days, path = paste0(output_folder,new_file_name))
  #save(days, file = paste0(output_folder,new_file_name))
  
  return(users)
}

stopImplicitCluster()
toc()
## 108.527 sec elapsed
write_feather(users, path = paste0(IO$tmp_data, "full_list_users_with_pos_preg_tests.feather"))

Create a user table from the list of users that ever logged a positive pregnancy test

#users = read_feather(path = paste0(IO$tmp_data, "full_list_users_with_pos_preg_tests.feather"))

users$batch = as.numeric(users$kindara_csv_file)

users_agg = ddply(users,
                  "user_id",
                  summarise,
                  kindara_csv_file = paste0(kindara_csv_file, collapse = ","),
                  batch = min(batch))

users = users_agg
users$pos_preg_test = TRUE

write_feather(users, path = paste0(IO$output_data, "users.feather"))
ok = file.copy(from = paste0(IO$output_data, "users.feather"), to = paste0(IO$tmp_data, "users_with_pos_preg_tests.feather"), overwrite = TRUE)

Filter the days table and re-organize users into batches

input_folder = paste0(IO$tmp_data,"Days_feather_from_csv/")
tmp_folder = paste0(IO$tmp_data,"Days_filtered_split_batches/")
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}

files = list.files(input_folder)

cl = makeCluster(par$n_cores)
registerDoParallel(cl)

ok = foreach(file = files, .packages = "feather") %dopar%{
  
  full_days = read_feather(path = paste0(input_folder,file)) 
  
  # filtering
  full_days = full_days[full_days$user_id %in% users$user_id,]
  full_days$input_file_id = file
  
  # split by batches
  for(b in unique(users$batch[users$user_id %in% full_days$user_id])){
    days = full_days[full_days$user_id %in% users$user_id[users$batch == b],]
    days$batch = b
    write_feather(days, path = paste0(tmp_folder,"batch_",b,"_",file))
  }
  
}
stopImplicitCluster()
input_folder = paste0(IO$tmp_data,"Days_filtered_split_batches/")
output_folder = paste0(IO$output_data,"Days/")
tmp_folder = paste0(IO$tmp_data, "Days_filtered/")
if(dir.exists(input_folder)){unlink(output_folder, recursive = TRUE);dir.create(output_folder)}
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}

files = list.files(input_folder)

input_files = foreach(b = unique(users$batch), .combine = rbind) %do%{
  
  cl = makeCluster(par$n_cores)
  registerDoParallel(cl)
  
  batch_files = files[grep(paste0("batch_",b,"_day"), files)]
  
  days = foreach(file = batch_files, .combine = rbind, .packages = "feather") %dopar%{
    days = read_feather(path = paste0(input_folder,file))
    return(days)
  }
  stopImplicitCluster()
  
  # checking for duplicated rows
  d = duplicated(days)
  j = which(d)
  if(length(j)>0){
    days = days[-j,]
  }
  dim(days)
  
  write_feather(days, path = paste0(output_folder,"days_",b,".feather"))
  file.copy(from = paste0(output_folder,"days_",b,".feather"), to = paste0(tmp_folder,"days_",b,".feather"), overwrite = TRUE)
  
  input_files = aggregate(input_file_id ~ user_id, days, function(x){paste0(unique(sort(x)),collapse = "|")})
  return(input_files)
}

save(input_files, file = paste0(IO$tmp_data, "input_files.Rdata"))

2.2 Identify cycles (and pregnancies)

input_folder = paste0(IO$tmp_data,"Days_filtered/")
output_folder = paste0(IO$output_data,"Days/")
tmp_folder = paste0(IO$tmp_data, "Days_filtered_with_cycles/")
if(dir.exists(input_folder)){unlink(output_folder, recursive = TRUE);dir.create(output_folder)}
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}

files = list.files(input_folder)


cl = makeCluster(par$n_cores)
registerDoParallel(cl)

foreach(file = files, .packages = c("feather","zoo","plyr","dplyr","tictoc","foreach")) %dopar%{
  
  days = read_feather(path = paste0(input_folder,file))
  o = order(days$user_id, days$date)
  days = days[o,]
  days$is_first_day = FALSE
  days$first_day_type = NA
  
  days = mutate(days,
                day_id = paste0(user_id, "_",date))
  
  user_ids = unique(days$user_id)
  tic()
  day_ids = foreach(user_id  = user_ids, .combine = c)%do%{
    this_user_day = days[which((days$user_id == user_id)),]
    this_user_day$day = as.numeric(this_user_day$date - min(this_user_day$date))
    
    
    cycle_starts = find_cycle_starts(this_user_day = this_user_day, debug = FALSE)
    
    # cycle_starts
    # this_user_day$day[this_user_day$first_day]
    
    # plot.tracking.history(d = this_user_day, show_tests = TRUE)
    # abline(v = this_user_day$date[this_user_day$first_day],  col = "green")
    # abline(v = this_user_day$date[match(cycle_starts,this_user_day$day)], lty = 3, col = "red")
    
    day_ids = this_user_day$day_id[this_user_day$day %in% cycle_starts]
    
    return(day_ids)
  }
  toc()
  
  days$is_first_day[days$day_id %in% day_ids] = TRUE
  
  agg = aggregate(date ~ user_id, days, min)
  days$first_day_type[days$day_id %in% paste0(agg$user_id,"_",agg$date)] = 0
  days$first_day_type[days$is_first_day] = 1

    
  write_feather(days, path = paste0(output_folder,file))
  file.copy(from = paste0(output_folder,file), to = paste0(tmp_folder,file), overwrite = TRUE)
}
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE
## 
## [[7]]
## [1] TRUE
## 
## [[8]]
## [1] TRUE
## 
## [[9]]
## [1] TRUE
## 
## [[10]]
## [1] TRUE
## 
## [[11]]
## [1] TRUE
## 
## [[12]]
## [1] TRUE
## 
## [[13]]
## [1] TRUE
## 
## [[14]]
## [1] TRUE
## 
## [[15]]
## [1] TRUE
## 
## [[16]]
## [1] TRUE
## 
## [[17]]
## [1] TRUE
## 
## [[18]]
## [1] TRUE
## 
## [[19]]
## [1] TRUE
## 
## [[20]]
## [1] TRUE
## 
## [[21]]
## [1] TRUE
## 
## [[22]]
## [1] TRUE
## 
## [[23]]
## [1] TRUE
## 
## [[24]]
## [1] TRUE
## 
## [[25]]
## [1] TRUE
## 
## [[26]]
## [1] TRUE
## 
## [[27]]
## [1] TRUE
## 
## [[28]]
## [1] TRUE
## 
## [[29]]
## [1] TRUE
## 
## [[30]]
## [1] TRUE
## 
## [[31]]
## [1] TRUE
## 
## [[32]]
## [1] TRUE
## 
## [[33]]
## [1] TRUE
## 
## [[34]]
## [1] TRUE
## 
## [[35]]
## [1] TRUE
## 
## [[36]]
## [1] TRUE
## 
## [[37]]
## [1] TRUE
## 
## [[38]]
## [1] TRUE
## 
## [[39]]
## [1] TRUE
## 
## [[40]]
## [1] TRUE
## 
## [[41]]
## [1] TRUE
## 
## [[42]]
## [1] TRUE
## 
## [[43]]
## [1] TRUE
## 
## [[44]]
## [1] TRUE
## 
## [[45]]
## [1] TRUE
## 
## [[46]]
## [1] TRUE
## 
## [[47]]
## [1] TRUE
## 
## [[48]]
## [1] TRUE
## 
## [[49]]
## [1] TRUE
## 
## [[50]]
## [1] TRUE
## 
## [[51]]
## [1] TRUE
## 
## [[52]]
## [1] TRUE
## 
## [[53]]
## [1] TRUE
## 
## [[54]]
## [1] TRUE
## 
## [[55]]
## [1] TRUE
## 
## [[56]]
## [1] TRUE
## 
## [[57]]
## [1] TRUE
## 
## [[58]]
## [1] TRUE
## 
## [[59]]
## [1] TRUE
## 
## [[60]]
## [1] TRUE
## 
## [[61]]
## [1] TRUE
## 
## [[62]]
## [1] TRUE
## 
## [[63]]
## [1] TRUE
## 
## [[64]]
## [1] TRUE
stopImplicitCluster()

2.3 Create a cycles table

We cannot use the cycles table that Kindara provided because we re-defined the cycles. We thus create the cycles from the days table by looking at which days have the flag is_first_day.

#users = read_feather(paste0(IO$output_data,"users.feather"))

days_input_folder = paste0(IO$output_data,"Days/")
days_files = list.files(days_input_folder)

cl = makeCluster(par$n_cores)
registerDoParallel(cl)

cycles = foreach(file  = days_files, .combine = rbind, .packages = "feather") %dopar%
{
  days = read_feather(path = paste0(days_input_folder,file))
  colnames(days)
  dim(days)
  
  # creating the cycles table
  cycles = days[!is.na(days$first_day_type), c("user_id","date","first_day_type")]
  colnames(cycles)[which(colnames(cycles) == "date")] = "start_date"
  cycles = cycles[order(cycles$user_id, cycles$start_date),]  
  
  j = which(cycles$user_id %in% users$user_id)
  length(j)
  cycles = cycles[j,]
  
  return(cycles)
}


stopImplicitCluster()

dim(cycles)
## [1] 1744191       3
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_first_version.feather"), overwrite = TRUE)
## [1] TRUE

We create unique cycle ID in the cycle table

# cycles = read_feather(path = paste0(IO$output_data,"users.Rdata"))
cycles = cycles[order(cycles$user_id, cycles$start_date),]

cycles$cycle_nb = ave(cycles$first_day_type, cycles$user_id, FUN = cumsum)
cycles$cycle_id = paste0(cycles$user_id, "_" ,cycles$cycle_nb)

cycles$end_date = cycles$start_date[match(cycles$cycle_id, paste0(cycles$user_id,"_",cycles$cycle_nb-1))] - 1

cycles$cycle_length = as.numeric(cycles$end_date - cycles$start_date + 1)


write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_nb_and_id.feather"), overwrite = TRUE)
## [1] TRUE

And associate each row of the days to a cycle

days_folder = paste0(IO$output_data,"Days/")
days_tmp_folder = paste0(IO$tmp_data,"Days_with_cycle_id/")
if(!dir.exists(days_tmp_folder)){dir.create(days_tmp_folder)}

cl = makeCluster(par$n_cores)
registerDoParallel(cl)

days_files = list.files(days_folder)

ok = foreach(file  = days_files, .packages = "feather") %dopar%
{
  days = read_feather(path = paste0(days_folder,file))
  colnames(days)
  dim(days)
  
  # take the part of cycles that matches with the days users
  j = which(cycles$user_id %in% unique(days$user_id)) #& (!is.na(cycles$cycle_length)))
  cycles_sub = cycles[j,]
  # for unfinished cycles, we will consider a time-window of 3 years = 1095 days after the start of the cycle to capture information about these on-going cycles.
  cycles_sub$cycle_length[which(is.na(cycles_sub$cycle_length))] = 1095
  
  # expand cycles for each day
  cycles_sub_exp = as.data.frame(lapply(cycles_sub, rep, cycles_sub$cycle_length))
  cycles_sub_exp$cycleday = ave(rep(1,nrow(cycles_sub_exp)), cycles_sub_exp$cycle_id, FUN =cumsum)
  cycles_sub_exp$date = cycles_sub_exp$start_date + (cycles_sub_exp$cycleday - 1)
  cycles_sub_exp$day_id = paste0(cycles_sub_exp$user_id, "_", cycles_sub_exp$date)
  
  
  # match days and cycles_sub_exp
  days$day_id =  paste0(days$user_id, "_", days$date)
  m = match(days$day_id, cycles_sub_exp$day_id)
  days$cycle_nb = cycles_sub_exp$cycle_nb[m]
  days$cycle_id = cycles_sub_exp$cycle_id[m]
  days$cycle_length = cycles_sub_exp$cycle_length[m]
  days$cycleday = cycles_sub_exp$cycleday[m]
  
  days$cycleday_from_end = days$cycleday - days$cycle_length - 1
  
  write_feather(days, path = paste0(days_folder, file))
  file.copy(from = paste0(days_folder, file), to = paste0(days_tmp_folder, file), overwrite = TRUE)
}

stopImplicitCluster()

2.4 Aggregated cycles variable

Now we can aggregate the days table to report useful information on the cycles table

  • aggregate to create the cycles table

  • user_id –v
  • cycle_id –v
  • cycle_nb –v
  • cycle_length –v
  • n_days_obs –v
  • day_last_obs [cycleday] –v
  • n_pos_preg_test –v
  • n_neg_preg_test –v
  • day_first_pos_preg_test [cycleday] –v
  • day_last_pos_preg_test [cycleday] –v
  • n_days_obs_after_first_pos_preg_test –v
  • last_preg_test (0, 1, -1) –v
  • preg_test_class (0 = no preg test; 1 = at least one positive preg test ; -1 = only negative preg tests)
  • n_tot_sex –v
  • n_prot_sex –v
  • n_unprot_sex –v
  • n_withdrawal –v
  • n_insemination –v
  • n_BBT –v

input_days_folder = paste0(IO$tmp_data,"Days_with_cycle_id/")
output_days_folder = paste0(IO$output_data,"Days/")

cl = makeCluster(par$n_cores)
registerDoParallel(cl)

days_files = list.files(input_days_folder)

cycles_agg = foreach(file  = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
  days = read_feather(path = paste0(input_days_folder,file))
  colnames(days)
  dim(days)
  
  # 
  cycles_agg = ddply(days, 
                     .(cycle_id), 
                     .parallel = FALSE,  # FALSE,  # TRUE
                     .fun = summarize,
                     cycle_length = min(cycle_length),
                     n_days_obs = lu(date),
                     last_obs_day = max(cycleday),
                     n_pos_preg_test = sum(preg_test == 1),
                     n_neg_preg_test = sum(preg_test == -1),
                     day_from_end_first_pos_preg_test = min(Inf, cycleday_from_end * (preg_test == 1), na.rm = TRUE),
                     day_last_pos_preg_test = max(-Inf, cycleday * (preg_test == 1), na.rm = TRUE),
                     day_last_preg_test  = max(-Inf,cycleday * (preg_test %in%  c(1,-1)), na.rm = TRUE),
                     n_tot_sex = sum(sex > 0, na.rm = TRUE),
                     n_prot_sex = sum(sex == 1, na.rm = TRUE),
                     n_unprot_sex =  sum(sex == 2, na.rm = TRUE),
                     n_withdrawal =  sum(sex == 3, na.rm = TRUE),
                     n_insemination = sum(sex == 4, na.rm = TRUE),
                     n_BBT = sum(!is.na(temperature), na.rm = TRUE))
  
  
  cycles_agg$day_first_pos_preg_test = NA
  j = which(cycles_agg$day_from_end_first_pos_preg_test < 0)
  cycles_agg$day_first_pos_preg_test[j] = cycles_agg$cycle_length[j] + cycles_agg$day_from_end_first_pos_preg_test[j] + 1
  
  cycles_agg$n_pos_preg_test[is.na(cycles_agg$n_pos_preg_test)] = 0
  cycles_agg$n_neg_preg_test[is.na(cycles_agg$n_neg_preg_test)] = 0
  cycles_agg$day_first_pos_preg_test[is.infinite(cycles_agg$day_first_pos_preg_test)] = 0
  cycles_agg$day_last_pos_preg_test[is.infinite(cycles_agg$day_last_pos_preg_test)] = 0
  
  # n_days_obs_after_first_pos_preg_test
  days$day_first_pos_preg_test = cycles_agg$day_first_pos_preg_test[match(days$cycle_id, cycles_agg$cycle_id)]
  days$after_first_pos_preg_test = (days$day_first_pos_preg_test > 0) & (days$cycleday > days$day_first_pos_preg_test)
  
  cycles_agg2 = aggregate(after_first_pos_preg_test ~ cycle_id, days, sum, na.rm = TRUE )
  cycles_agg$n_days_obs_after_first_pos_preg_test = cycles_agg2$after_first_pos_preg_test[match(cycles_agg$cycle_id, cycles_agg2$cycle_id)]
  
  # last_preg_test
  days$day_last_preg_test = cycles_agg$day_last_preg_test[match(days$cycle_id, cycles_agg$cycle_id)]
  cycles_agg2 = days[which(days$cycleday == days$day_last_preg_test),]
  cycles_agg$last_preg_test = cycles_agg2$preg_test[match(cycles_agg$cycle_id, cycles_agg2$cycle_id)]
  cycles_agg$last_preg_test[is.na(cycles_agg$last_preg_test)]= 0
  
  # preg_test_class
  #cycles_agg$preg_test_class = ifelse(cycles_agg$n_pos_preg_test>0,ifelse(cycles_agg$last_preg_test == 1, "pregnant","pregnancy loss"), ifelse(cycles_agg$n_neg_preg_test>0,"not pregnant", "not tested"))
  cycles_agg$preg_test_class = ifelse(cycles_agg$n_pos_preg_test>0,"pregnant", ifelse(cycles_agg$n_neg_preg_test>0,"not pregnant", "not tested"))
  
  
  return(cycles_agg)
  
}

stopImplicitCluster()

write_feather(cycles_agg, path = paste0(IO$tmp_data, "cycles_agg.feather"))
column_names = colnames(cycles_agg[,-which(colnames(cycles_agg) %in% c("cycle_id","cycle_length"))])
#column_names = column_names[-which(column_names %in% colnames(cycles))]
m = match(cycles$cycle_id, cycles_agg$cycle_id)
for(column  in column_names){
  eval(parse(text = paste0("cycles$",column,"= cycles_agg$",column,"[m]")))
  #eval(parse(text = paste0("cycles$",column,"[is.na(cycles$",column,")]= 0")))
}

write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_agg.feather"), overwrite = TRUE)
## [1] TRUE
cycles$preg_type = NA
cycles$preg_type[which((cycles$preg_test_class == "pregnant") & (!is.na(cycles$cycle_length)))] = 1
cycles$preg_type[which((cycles$preg_test_class == "pregnant") & (is.na(cycles$cycle_length)))] = 0
cycles$preg_type[which((cycles$preg_test_class == "pregnant") & (cycles$cycle_nb == 0))] = -1

2.5 Augmenting the user table

2.5.1 From the cycles and days tables

  • with number of cycles before first positive pregnancy test
  • with number of cycles after last positive pregnancy test
  • with length of shortest cycle before first pregnancy test
#load(paste0(IO$tmp_data,"users_with_original_file_id.Rdata"),verbose = TRUE)

users_agg = suppressWarnings(
  ddply(cycles, 
        .(user_id), 
        .fun = summarize,
        n_cycles = max(cycle_nb, na.rm = TRUE),
        n_days_obs = sum(n_days_obs, na.rm = TRUE),
        n_pos_cycles = sum(n_pos_preg_test > 0, na.rm = TRUE),
        first_cycle_preg = min(cycle_nb[n_pos_preg_test > 0], na.rm = TRUE),
        last_cycle_preg = max(cycle_nb[n_pos_preg_test > 0], na.rm = TRUE)
  )
)

users_agg$first_cycle_preg[is.infinite(users_agg$first_cycle_preg)] =  0
users_agg$last_cycle_preg[is.infinite(users_agg$last_cycle_preg)] =  Inf


# n_obs_after_last_preg
cycles_tmp = cycles
cycles_tmp$first_cycle_preg = users_agg$first_cycle_preg[match(cycles_tmp$user_id, users_agg$user_id)]
cycles_tmp$last_cycle_preg = users_agg$last_cycle_preg[match(cycles_tmp$user_id, users_agg$user_id)]
users_agg2 = aggregate(n_days_obs ~ user_id, cycles_tmp[cycles_tmp$cycle_nb > cycles_tmp$last_cycle_preg,  ], sum, na.rm = TRUE)

users_agg$n_days_obs_after_last_preg = users_agg2$n_days_obs[match(users_agg$user_id, users_agg2$user_id)]
users_agg$n_days_obs_after_last_preg[is.na(users_agg$n_days_obs_after_first_preg )] = 0

users_agg$n_cycles_after_last_preg = users_agg$n_cycles - users_agg$last_cycle_preg
users_agg$n_cycles_after_last_preg[is.infinite(users_agg$n_cycles_after_last_preg)] = 0

# minimal cycle length before the first positive preg test
users_agg2 = aggregate(cycle_length ~ user_id, 
                       cycles_tmp[cycles_tmp$cycle_nb < cycles_tmp$first_cycle_preg,  ], 
                       min, na.rm = TRUE)

users_agg$shortest_cycle_before_first_pos_preg = users_agg2$cycle_length[match(users_agg$user_id, users_agg2$user_id)]

# adding new columns to the users table

column_names = colnames(users_agg)
column_names = column_names[-which(column_names %in% colnames(users))]
m = match(users$user_id, users_agg$user_id)
for(column  in column_names){
  eval(parse(text = paste0("users$",column,"= users_agg$",column,"[m]")))
}

write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_agg.feather"), overwrite = TRUE)
## [1] TRUE
  • aggregate

  • avg, median and sd of cycle_length (cycles without positive pregnancy tests)
  • avg, median and sd of cycle_length (cycles before first positive pregnancy tests)

#load(paste0(IO$tmp_data,"users_with_original_file_id.Rdata"),verbose = TRUE)
cycles_tmp = cycles_tmp[cycles_tmp$user_id %in% users$user_id,]

users_agg = suppressWarnings(
  ddply(cycles_tmp, 
        .(user_id), 
        .fun = summarize,
        cycle_length_no_preg_avg = mean(cycle_length[n_pos_preg_test == 0], na.rm = TRUE),
        cycle_length_no_preg_median = median(cycle_length[n_pos_preg_test == 0], na.rm = TRUE),
        cycle_length_no_preg_sd = sd(cycle_length[n_pos_preg_test == 0], na.rm = TRUE),
        cycle_length_before_preg_avg = mean(cycle_length[cycle_nb < first_cycle_preg], na.rm = TRUE),
        cycle_length_before_preg_median = median(cycle_length[cycle_nb < first_cycle_preg], na.rm = TRUE),
        cycle_length_before_preg_sd = sd(cycle_length[cycle_nb < first_cycle_preg], na.rm = TRUE))
)


column_names = colnames(users_agg)
column_names = column_names[-which(column_names %in% c("user_id"))]
m = match(users$user_id, users_agg$user_id)
for(column  in column_names){
  eval(parse(text = paste0("users$",column,"= users_agg$",column,"[m]")))
}


write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_cycle_length_stats.feather"), overwrite = TRUE)
## [1] TRUE
input_days_folder = paste0(IO$output_data,"Days/")

cl = makeCluster(par$n_cores)
registerDoParallel(cl)

days_files = list.files(input_days_folder)

users_agg = foreach(file  = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
  days = read_feather(path = paste0(input_days_folder,file))
  
  users_agg = ddply(days, 
                    .(user_id), 
                    .parallel = TRUE,  # FALSE,  # TRUE
                    .fun = summarize,
                    n_pos_preg_tests = sum(preg_test == 1),
                    earliest_date = min(date, na.rm = TRUE),
                    latest_date = max(date, na.rm = TRUE))
  return(users_agg)
  
}

stopImplicitCluster()

write_feather(users_agg, path = paste0(IO$tmp_data, "users_agg_earliest_and_latest_dates.feather"))
column_names = colnames(users_agg)
column_names = column_names[-which(column_names %in% colnames(users))]
m = match(users$user_id, users_agg$user_id)
for(column  in column_names){
  eval(parse(text = paste0("users$",column,"= users_agg$",column,"[m]")))
}

users$app_usage_duration_in_days = as.numeric(users$latest_date - users$earliest_date) 
users$app_usage_duration_in_years = users$app_usage_duration_in_days/365

write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_earliest_and_latest_date.feather"), overwrite = TRUE)
## [1] TRUE

2.5.2 From the Kindara accounts table

2.5.2.1 Add the age to the users and cycles table

accounts = read_tsv(paste0(IO$input_data,"accounts.csv"))
## Parsed with column specification:
## cols(
##   id = col_character(),
##   objective = col_character(),
##   birth_day = col_datetime(format = ""),
##   average_cycle = col_double(),
##   average_luteal = col_double(),
##   average_period = col_double(),
##   avr_follicular_temp = col_logical(),
##   avr_luteal_temp = col_logical()
## )
accounts$birth_year = year(accounts$birth_day)
users$birth_year = accounts$birth_year[match(users$user_id, accounts$id)]
users$age_now = year(today()) - users$birth_year
users$age_now[(users$age_now<15)|(users$age_now>55)] = NA

write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_age.feather"), overwrite = TRUE)
## [1] TRUE
cycles$birth_year = users$birth_year[match(cycles$user_id, users$user_id)]
cycles$current_age = year(cycles$start_date) - cycles$birth_year
cycles$current_age[(cycles$current_age<15)|(cycles$current_age>55)] = NA

users$age_at_first_pregnancy = cycles$current_age[match(paste0(users$user_id, "_",users$first_cycle_preg),cycles$cycle_id)]
  
  
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_age_at_first_pregnancy.feather"), overwrite = TRUE)
## [1] TRUE
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_age.feather"), overwrite = TRUE)
## [1] TRUE

2.5.2.2 Add the reproductive objectives declared by users to the users table

accounts = read_tsv(paste0(IO$input_data,"accounts.csv"))
## Parsed with column specification:
## cols(
##   id = col_character(),
##   objective = col_character(),
##   birth_day = col_datetime(format = ""),
##   average_cycle = col_double(),
##   average_luteal = col_double(),
##   average_period = col_double(),
##   avr_follicular_temp = col_logical(),
##   avr_luteal_temp = col_logical()
## )
users$reprod_obj_app = accounts$objective[match(users$user_id, accounts$id)]
cycles$reprod_obj_app = users$reprod_obj_app[match(cycles$user_id, users$user_id)]

write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_reprod_obj_app.feather"), overwrite = TRUE)
## [1] TRUE
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_reprod_obj_app.feather"), overwrite = TRUE)
## [1] TRUE

2.6 Creating a pregnancy table

pregnancies = select(cycles[which(cycles$preg_test_class == "pregnant"),], user_id, cycle_id, cycle_nb, cycle_length, preg_type, current_age, reprod_obj_app)

colnames(pregnancies)[match(c("cycle_id","cycle_length"),colnames(pregnancies))] = c("pregnancy_id","preg_duration")
pregnancies$preg_duration[which(pregnancies$cycle_nb == 0)] = NA


write_feather(pregnancies, path = paste0(IO$output_data, "pregnancies.feather"))
file.copy(from = paste0(IO$output_data, "pregnancies.feather"), to = paste0(IO$tmp_data, "pregnancies_1st_version.feather"), overwrite = TRUE)
## [1] TRUE

2.6.1 Users types

users_agg = aggregate(preg_type ~ user_id, pregnancies, max)

users$user_type = users_agg$preg_type[match(users$user_id, users_agg$user_id)]


write_feather(users, path = paste0(IO$output_data, "users.feather"))
file.copy(from = paste0(IO$output_data, "users.feather"), to = paste0(IO$tmp_data, "users_with_user_type.feather"), overwrite = TRUE)
## [1] TRUE
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)

2.7 Pregnancy outcomes

users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather( path = paste0(IO$output_data, "cycles.feather"))
pregnancies = read_feather( path = paste0(IO$output_data, "pregnancies.feather"))

2.7.1 For each pregnancy

cycles$preg_outcome_based_on_duration = factor(cycles$preg_test_class, 
                             levels = c(dict$pregnancy_timeline$abbreviation,
                                        unique(cycles$preg_test_class)))

j = which(cycles$preg_test_class == "pregnant")

cycles$preg_outcome_based_on_duration[j] = cut(cycles$cycle_length[j], 
                             breaks = c(0,dict$pregnancy_timeline$duration_in_days), 
                             labels = as.character(dict$pregnancy_timeline$abbreviation))



cycles$cycle_length_next_cycle = cycles$cycle_length[match(paste0(cycles$user_id, "_",cycles$cycle_nb+1),cycles$cycle_id)]

cycles$preg_outcome_cat = as.character(cycles$preg_outcome_based_on_duration)
cycles$preg_outcome_cat[cycles$preg_outcome_based_on_duration == "TB noBF"] = "TB noBF (II)"
cycles$preg_outcome_cat[cycles$preg_outcome_based_on_duration == "BF"] = "TB BF (II)"
cycles$preg_outcome_cat[which((cycles$preg_outcome_cat == "TB") & (cycles$cycle_length_next_cycle <= 7*9))] = "TB noBF (I)"
cycles$preg_outcome_cat[which((cycles$preg_outcome_cat == "TB") & (cycles$cycle_length_next_cycle > 7*9))] = "TB BF (I)"
cycles$preg_outcome_cat[which((cycles$preg_outcome_cat == "TB") & (is.na(cycles$cycle_length_next_cycle)))] = "TB (III)"
cycles$preg_outcome = dict$pregnancy_outcomes$categories[match(cycles$preg_outcome_cat, dict$pregnancy_outcomes$abbreviation)]
cycles$preg_outcome_cat = factor(cycles$preg_outcome_cat, levels = unique(dict$pregnancy_outcomes$abbreviation))
cycles$preg_outcome = factor(cycles$preg_outcome, levels = unique(dict$pregnancy_outcomes$categories))

counts = table(cycles$preg_outcome_cat[j])

table(cycles$preg_outcome_cat[j])/sum(cycles$preg_outcome_cat %in% c("EPL","LPL","ExPTB","PTB","TB noBF (I)","TB noBF (II)","TB (III)","TB BF (I)","TB BF (II)"))
## 
##      FP-VEPL          EPL          LPL        ExPTB          PTB 
##   0.23621122   0.29455264   0.14251393   0.02087274   0.02312188 
##  TB noBF (I) TB noBF (II)     TB (III)    TB BF (I)   TB BF (II) 
##   0.02952853   0.13264837   0.02019118   0.07490330   0.26166743 
##      unclear 
##   0.24907564
m = match(pregnancies$pregnancy_id, cycles$cycle_id)
pregnancies$preg_outcome_based_on_duration = cycles$preg_outcome_based_on_duration[m]
pregnancies$preg_outcome_cat = cycles$preg_outcome_cat[m]
pregnancies$preg_outcome = cycles$preg_outcome[m]


write_feather(cycles, path = paste0(IO$output_data, "cycles.feather"))
file.copy(from = paste0(IO$output_data, "cycles.feather"), to = paste0(IO$tmp_data, "cycles_with_preg_outcome.feather"), overwrite = TRUE)
## [1] TRUE
write_feather(pregnancies, path = paste0(IO$output_data, "pregnancies.feather"))
file.copy(from = paste0(IO$output_data, "pregnancies.feather"), to = paste0(IO$tmp_data, "pregnancies_with_preg_outcome.feather"), overwrite = TRUE)
## [1] TRUE

2.7.2 Aggregates per users

users_preg_outcome =  ddply(cycles, 
                            .(user_id), 
                            .fun = summarize,
                            n_preg = sum(preg_test_class == "pregnant", na.rm = TRUE),
                            n_PL = sum(preg_outcome %in% c("EPL","LPL"), na.rm = TRUE),
                            n_LB = sum(preg_outcome %in% c("ExPTB","PTB","TB noBF","BF"), na.rm = TRUE)
                            
)

table(users_preg_outcome$n_LB, users_preg_outcome$n_PL)
##    
##         0     1     2     3     4     5     6     8
##   0 89300 15169  1569   251    46    10     1     1
##   1 21259  3671   627   108    22     3     0     0
##   2  2620   621   127    16     1     1     0     0
##   3   139    28     8     2     0     0     0     0
##   4     9     1     0     0     0     0     0     0
##   5     0     0     0     0     0     0     0     1
table(users_preg_outcome$n_LB)
## 
##      0      1      2      3      4      5 
## 106347  25690   3386    177     10      1
table(users_preg_outcome$n_PL)
## 
##      0      1      2      3      4      5      6      8 
## 113327  19490   2331    377     69     14      1      2
j = which((users_preg_outcome$n_LB + users_preg_outcome$n_PL)>0)
table(users_preg_outcome$n_LB[j], users_preg_outcome$n_PL[j])
##    
##         0     1     2     3     4     5     6     8
##   0     0 15169  1569   251    46    10     1     1
##   1 21259  3671   627   108    22     3     0     0
##   2  2620   621   127    16     1     1     0     0
##   3   139    28     8     2     0     0     0     0
##   4     9     1     0     0     0     0     0     0
##   5     0     0     0     0     0     0     0     1
table(users_preg_outcome$n_LB[j])
## 
##     0     1     2     3     4     5 
## 17047 25690  3386   177    10     1
round(table(users_preg_outcome$n_LB[j])/sum(table(users_preg_outcome$n_LB[j])) * 100, 2)
## 
##     0     1     2     3     4     5 
## 36.81 55.47  7.31  0.38  0.02  0.00
table(users_preg_outcome$n_PL[j])
## 
##     0     1     2     3     4     5     6     8 
## 24027 19490  2331   377    69    14     1     2
round(table(users_preg_outcome$n_PL[j])/sum(table(users_preg_outcome$n_PL[j])) * 100, 2)
## 
##     0     1     2     3     4     5     6     8 
## 51.88 42.09  5.03  0.81  0.15  0.03  0.00  0.00
dim(users)
## [1] 135611     28
dim(users_preg_outcome)
## [1] 135611      4
colnames = colnames(users_preg_outcome[,2:ncol(users_preg_outcome)])
m = match(users$user_id, users_preg_outcome$user_id)
for(colname in colnames){
  eval(parse(text = paste0("users$",colname," = users_preg_outcome$",colname,"[m]")))
}

write_feather(users, path = paste0(IO$output_data, "users.feather"))
file.copy(from = paste0(IO$output_data, "users.feather"), to = paste0(IO$tmp_data, "users_with_preg_outcome.feather"), overwrite = TRUE)
## [1] TRUE
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)

3 Reproductive objectives of the users

3.1 Data preparation

3.1.1 Loading cycles and users

users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))
pregnancies = read_feather(path = paste0(IO$output_data, "pregnancies.feather"))

3.1.2 Identifying the 3 cycles preceding a pregnancy

cycles$cycle_nb_from_next_preg = NA
j_preg = which(cycles$preg_test_class == "pregnant")
cycles$pregnancy_id = NA 
j3 = match(paste0(cycles$user_id[j_preg],"_",cycles$cycle_nb[j_preg] - 3),cycles$cycle_id)
cycles$cycle_nb_from_next_preg[j3] = -3 
cycles$pregnancy_id[j3[!is.na(j3)]] = cycles$cycle_id[j_preg[!is.na(j3)]] 
j2 = match(paste0(cycles$user_id[j_preg],"_",cycles$cycle_nb[j_preg] - 2),cycles$cycle_id)
cycles$cycle_nb_from_next_preg[j2] = -2
cycles$pregnancy_id[j2[!is.na(j2)]] = cycles$cycle_id[j_preg[!is.na(j2)]] 
j1 = match(paste0(cycles$user_id[j_preg],"_",cycles$cycle_nb[j_preg] - 1),cycles$cycle_id)
cycles$cycle_nb_from_next_preg[j1] = -1
cycles$pregnancy_id[j1[!is.na(j1)]] = cycles$cycle_id[j_preg[!is.na(j1)]] 
j0 = j_preg
cycles$cycle_nb_from_next_preg[j0] = 0
cycles$pregnancy_id[j0] = cycles$cycle_id[j_preg] 

3.2 based on what they reported in the app at on-boarding

ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj_app) )+
  geom_histogram(position = "identity",binwidth = 7, alpha = 1) +
  facet_grid(reprod_obj_app ~ . , scale = "free")+
  xlim(0,1000)
## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing missing values (geom_bar).

ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj_app) )+
  geom_bar() +
  facet_grid(reprod_obj_app ~ . , scale = "free")

t1 = table( pregnancies$reprod_obj_app, pregnancies$preg_outcome)
round(100* t1/apply(t1, 1, sum))
##               
##                FP-VEPL EPL LPL ExPTB PTB TB noBF BF unclear
##   avoid_preg        12  15   5     1   2      21 31      12
##   get_preg          23  25  12     2   2       8 15      13
##   other             23  23   8     2   1      15 20       8
##   preg              14  19  10     1   1      11 24      20
##   track_period      13  17   7     1   2      19 28      13

It seems a little counter-intuitive as users who declared wanting to avoid pregnancy have the highest proportion of live-birth and the lowest proportion of pregnancy losses.

Maybe, it is because these labels are not reliable.

ggplot(cycles[cycles$n_insemination > 0,], aes(n_insemination, fill = reprod_obj_app))+
  geom_histogram(position = "identity",binwidth = 1) + xlim(0,20) +
  facet_grid(reprod_obj_app ~ ., scale = "free")
## Warning: Removed 216 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing missing values (geom_bar).

table(cycles$reprod_obj_app, cycles$n_insemination > 0)
##               
##                 FALSE   TRUE
##   avoid_preg   150691  10748
##   get_preg     382690  10443
##   other          2844     69
##   preg         964279  27698
##   track_period 187625   7049
table(cycles$reprod_obj_app, cycles$n_insemination > 0)/apply(table(cycles$reprod_obj_app, cycles$n_insemination > 0)
, 1, sum)
##               
##                     FALSE       TRUE
##   avoid_preg   0.93342377 0.06657623
##   get_preg     0.97343647 0.02656353
##   other        0.97631308 0.02368692
##   preg         0.97207798 0.02792202
##   track_period 0.96379075 0.03620925
ggplot(cycles[cycles$n_prot_sex > 0,], aes(n_prot_sex, fill = reprod_obj_app))+
  geom_histogram(position = "identity",binwidth = 1) + xlim(0,20) +
  facet_grid(reprod_obj_app ~ ., scale = "free")
## Warning: Removed 560 rows containing non-finite values (stat_bin).

## Warning: Removed 10 rows containing missing values (geom_bar).

A lot of the users who declared wanting to avoid pregnancy are actually logging inseminations; similarly, many of those who declare wanting to achieve pregnancy log protected sexual intercourses.

It may thus be relevant to attempt to guess the reproductive objectives based on the sexual behavior of the users.

However, sexual intercourses are not logged in many cycles, including those in which positive pregnancy tests are logged.

table(cycles$n_tot_sex == 0) / sum(table(cycles$n_tot_sex == 0))
## 
##     FALSE      TRUE 
## 0.5339027 0.4660973
t2 = table(cycles$preg_test_class, cycles$n_tot_sex == 0)
t2
##               
##                 FALSE   TRUE
##   not pregnant 126983  12639
##   not tested   661325 766263
##   pregnant     142891  34035
t2/apply(t2, 1, sum)
##               
##                     FALSE       TRUE
##   not pregnant 0.90947702 0.09052298
##   not tested   0.46324640 0.53675360
##   pregnant     0.80763144 0.19236856

We can, nonetheless attempt to estimate the reproductive objectives for the users who do log sexual intercourses.

3.3 based on their sexual behavior in the 3 previous cycles

We look at the overlap between their unprotected sexual intercourse and their fertility window in the 3 cycles preceding a pregnancy.

3.3.1 Fertile window: counting backward

We can define the fertile window counting backward from the end of each cycle, assuming an identical luteal phase for each cycle and each users.

input_days_folder = paste0(IO$output_data,"Days/")
tmp_folder = paste0(IO$tmp_data, "Days_with_fertility/")
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}

cl = makeCluster(par$n_cores)
registerDoParallel(cl)

days_files = list.files(input_days_folder)

foreach(file  = days_files, .packages = c('plyr','dplyr','feather')) %dopar%
{
  days = read_feather(path = paste0(input_days_folder,file))
  
  j = which(cycles$cycle_nb_from_next_preg %in% -3:-1)
  days = days[days$cycle_id %in% cycles$cycle_id[j],]
  
  days$fertility_counting = dict$fertility_counting$fertility[
    match(days$cycleday_from_end, dict$fertility_counting$cycleday_from_end)]
  days$fertility_counting[is.na(days$fertility_counting)] = 0
  
  cycles_agg = aggregate(fertility_counting ~ cycle_id, days, mean, na.rm = TRUE)
  
  days_mean_fertility = cycles_agg$fertility_counting[match(days$cycle_id, cycles_agg$cycle_id)]
  
  days$fertility_counting_n = days$fertility_counting - days_mean_fertility
  
  ok = write_feather(days, path = paste0(tmp_folder,file))
}
## [[1]]
## # A tibble: 105,724 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001614… 2016-02-03 TRUE      FALSE               NA NA                 
##  2 001614… 2016-02-04 FALSE     FALSE               NA NA                 
##  3 001614… 2016-02-05 FALSE     FALSE               NA NA                 
##  4 001614… 2016-02-06 FALSE     FALSE               NA NA                 
##  5 001614… 2016-02-09 FALSE     FALSE               NA NA                 
##  6 001614… 2016-02-11 FALSE     FALSE               NA NA                 
##  7 001614… 2016-02-12 FALSE     FALSE               NA NA                 
##  8 001614… 2016-02-24 FALSE     FALSE               NA NA                 
##  9 001614… 2016-02-25 FALSE     FALSE               NA NA                 
## 10 001614… 2016-02-26 FALSE     FALSE               NA NA                 
## # … with 105,714 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[2]]
## # A tibble: 115,409 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001c54… 2016-04-05 TRUE      FALSE               NA NA                 
##  2 001c54… 2016-04-06 FALSE     FALSE               NA NA                 
##  3 001c54… 2016-04-07 FALSE     FALSE               NA NA                 
##  4 001c54… 2016-04-08 FALSE     FALSE               NA NA                 
##  5 001c54… 2016-04-09 FALSE     FALSE               NA NA                 
##  6 001c54… 2016-04-18 FALSE     FALSE               NA NA                 
##  7 001c54… 2016-04-21 FALSE     FALSE               NA NA                 
##  8 001c54… 2016-04-24 FALSE     FALSE               NA NA                 
##  9 001d8f… 2016-09-08 TRUE      FALSE               NA NA                 
## 10 001d8f… 2016-09-09 FALSE     FALSE               NA NA                 
## # … with 115,399 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[3]]
## # A tibble: 38,581 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0060b4… 2018-07-24 TRUE      FALSE               NA NA                 
##  2 0060b4… 2018-08-01 FALSE     FALSE               NA NA                 
##  3 0060b4… 2018-08-20 TRUE      FALSE               NA NA                 
##  4 0060b4… 2018-08-21 FALSE     FALSE               NA NA                 
##  5 0060b4… 2018-09-15 TRUE      FALSE               NA NA                 
##  6 0060b4… 2018-09-16 FALSE     FALSE               NA NA                 
##  7 0060b4… 2018-10-01 FALSE     FALSE               NA NA                 
##  8 0060b4… 2018-10-11 FALSE     FALSE               NA NA                 
##  9 007b72… 2015-11-12 TRUE      FALSE               NA NA                 
## 10 007b72… 2015-12-05 TRUE      FALSE               NA NA                 
## # … with 38,571 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[4]]
## # A tibble: 109,479 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0019de… 2015-07-05 TRUE      FALSE               NA NA                 
##  2 0019de… 2015-07-06 FALSE     FALSE               NA NA                 
##  3 0019de… 2015-07-07 FALSE     FALSE               NA NA                 
##  4 0019de… 2015-07-08 FALSE     FALSE               NA NA                 
##  5 0019de… 2015-07-09 FALSE     FALSE               NA NA                 
##  6 0019de… 2015-07-31 TRUE      FALSE               NA NA                 
##  7 0019de… 2015-08-01 FALSE     FALSE               NA NA                 
##  8 0019de… 2015-08-02 FALSE     FALSE               NA NA                 
##  9 0019de… 2015-08-03 FALSE     FALSE               NA NA                 
## 10 0019de… 2015-08-04 FALSE     FALSE               NA NA                 
## # … with 109,469 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[5]]
## # A tibble: 95,992 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001944… 2017-04-10 TRUE      FALSE               NA NA                 
##  2 001944… 2017-04-11 FALSE     FALSE               NA NA                 
##  3 001944… 2017-04-12 FALSE     FALSE               NA NA                 
##  4 001944… 2017-04-13 FALSE     FALSE               NA NA                 
##  5 001944… 2017-04-14 FALSE     FALSE               NA NA                 
##  6 001944… 2017-04-15 FALSE     FALSE               NA NA                 
##  7 001944… 2017-04-18 FALSE     FALSE               NA NA                 
##  8 001944… 2017-04-22 FALSE     FALSE               NA NA                 
##  9 001944… 2017-04-23 FALSE     FALSE               NA NA                 
## 10 001944… 2017-04-29 FALSE     FALSE               NA NA                 
## # … with 95,982 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[6]]
## # A tibble: 115,974 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000c7f… 2015-07-20 TRUE      FALSE               NA NA                 
##  2 000c7f… 2015-07-21 FALSE     FALSE               NA NA                 
##  3 000c7f… 2015-07-22 FALSE     FALSE               NA NA                 
##  4 000c7f… 2015-08-06 FALSE     FALSE               NA NA                 
##  5 000c7f… 2015-08-11 FALSE     FALSE               NA NA                 
##  6 000c7f… 2015-08-20 FALSE     FALSE               NA NA                 
##  7 000c7f… 2015-08-21 TRUE      FALSE               NA NA                 
##  8 000c7f… 2015-08-25 FALSE     FALSE               NA NA                 
##  9 000c7f… 2015-08-31 FALSE     FALSE               NA NA                 
## 10 000c7f… 2015-09-05 FALSE     FALSE               NA NA                 
## # … with 115,964 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[7]]
## # A tibble: 69,633 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00080f… 2016-09-09 TRUE      FALSE               NA NA                 
##  2 00080f… 2016-09-10 FALSE     FALSE               NA NA                 
##  3 00080f… 2016-09-11 FALSE     FALSE               NA NA                 
##  4 00080f… 2016-09-12 FALSE     FALSE               NA NA                 
##  5 00080f… 2016-09-13 FALSE     FALSE               NA NA                 
##  6 00080f… 2016-10-11 TRUE      FALSE               NA NA                 
##  7 00080f… 2016-10-12 FALSE     FALSE               NA NA                 
##  8 00080f… 2016-10-13 FALSE     FALSE               NA NA                 
##  9 00080f… 2016-10-14 FALSE     FALSE               NA NA                 
## 10 00080f… 2016-10-15 FALSE     FALSE               NA NA                 
## # … with 69,623 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[8]]
## # A tibble: 94,886 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00600b… 2016-12-26 TRUE      FALSE               NA NA                 
##  2 00600b… 2017-01-22 TRUE      TRUE                NA NA                 
##  3 00600b… 2017-01-23 FALSE     FALSE               NA NA                 
##  4 00600b… 2017-01-24 FALSE     FALSE               NA NA                 
##  5 00600b… 2017-01-25 FALSE     FALSE               NA NA                 
##  6 00600b… 2017-01-26 FALSE     FALSE               NA NA                 
##  7 00600b… 2017-01-27 FALSE     FALSE               NA NA                 
##  8 00600b… 2017-01-28 FALSE     FALSE               NA NA                 
##  9 00600b… 2017-01-29 FALSE     FALSE               NA NA                 
## 10 00600b… 2017-01-30 FALSE     FALSE               NA NA                 
## # … with 94,876 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[9]]
## # A tibble: 124,580 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001693… 2017-05-14 TRUE      FALSE               NA NA                 
##  2 001693… 2017-05-15 FALSE     FALSE               NA NA                 
##  3 001693… 2017-05-16 FALSE     FALSE               NA NA                 
##  4 001693… 2017-05-17 FALSE     FALSE               NA NA                 
##  5 001693… 2017-05-18 FALSE     FALSE               NA NA                 
##  6 001693… 2017-05-19 FALSE     FALSE               NA NA                 
##  7 001693… 2017-06-03 FALSE     FALSE               NA NA                 
##  8 001693… 2017-06-04 FALSE     FALSE               NA NA                 
##  9 001693… 2017-06-05 FALSE     FALSE               NA NA                 
## 10 001693… 2017-06-06 FALSE     FALSE               NA NA                 
## # … with 124,570 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[10]]
## # A tibble: 32,383 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 007be6… 2014-12-16 TRUE      FALSE               NA NA                 
##  2 007be6… 2014-12-17 FALSE     FALSE               NA NA                 
##  3 007be6… 2014-12-18 FALSE     FALSE               NA NA                 
##  4 007be6… 2014-12-19 FALSE     FALSE               NA NA                 
##  5 00852d… 2019-05-08 TRUE      FALSE               NA NA                 
##  6 00852d… 2019-05-09 FALSE     FALSE               NA NA                 
##  7 00852d… 2019-05-10 FALSE     FALSE               NA NA                 
##  8 00852d… 2019-05-16 FALSE     FALSE               NA NA                 
##  9 00852d… 2019-05-19 FALSE     FALSE               NA NA                 
## 10 00852d… 2019-05-20 FALSE     FALSE               NA NA                 
## # … with 32,373 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[11]]
## # A tibble: 79,257 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0040d1… 2016-08-29 TRUE      FALSE             NA   NA                 
##  2 0040d1… 2016-08-30 FALSE     FALSE             NA   NA                 
##  3 0040d1… 2016-09-10 FALSE     FALSE             NA   NA                 
##  4 0040d1… 2016-09-11 FALSE     FALSE             NA   NA                 
##  5 0040d1… 2016-09-12 FALSE     FALSE             NA   NA                 
##  6 0040d1… 2016-09-25 TRUE      FALSE             NA   NA                 
##  7 0040d1… 2016-09-26 FALSE     FALSE             97.2 1970-01-01 06:00:00
##  8 0040d1… 2016-09-27 FALSE     FALSE             96.8 1970-01-01 06:00:00
##  9 0040d1… 2016-09-28 FALSE     FALSE             96.9 1970-01-01 06:00:00
## 10 0040d1… 2016-09-29 FALSE     FALSE             97.3 1970-01-01 06:00:00
## # … with 79,247 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[12]]
## # A tibble: 103,150 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 008bac… 2017-01-19 TRUE      FALSE               NA NA                 
##  2 008bac… 2017-01-20 FALSE     FALSE               NA NA                 
##  3 008bac… 2017-01-21 FALSE     FALSE               NA NA                 
##  4 008bac… 2017-02-12 FALSE     FALSE               NA NA                 
##  5 008bac… 2017-02-13 FALSE     FALSE               NA NA                 
##  6 008bac… 2017-02-14 FALSE     FALSE               NA NA                 
##  7 008bac… 2017-02-19 TRUE      FALSE               NA NA                 
##  8 008bac… 2017-02-20 FALSE     FALSE               NA NA                 
##  9 008bac… 2017-02-21 FALSE     FALSE               NA NA                 
## 10 008bac… 2017-02-22 FALSE     FALSE               NA NA                 
## # … with 103,140 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[13]]
## # A tibble: 91,549 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000169… 2017-07-26 TRUE      FALSE               NA NA                 
##  2 000169… 2017-07-27 FALSE     FALSE               NA NA                 
##  3 000169… 2017-07-28 FALSE     FALSE               NA NA                 
##  4 000169… 2017-07-29 FALSE     FALSE               NA NA                 
##  5 000169… 2017-07-30 FALSE     FALSE               NA NA                 
##  6 000169… 2017-08-21 TRUE      FALSE               NA NA                 
##  7 000169… 2017-08-22 FALSE     FALSE               NA NA                 
##  8 000169… 2017-08-23 FALSE     FALSE               NA NA                 
##  9 000169… 2017-08-24 FALSE     FALSE               NA NA                 
## 10 000169… 2017-08-25 FALSE     FALSE               NA NA                 
## # … with 91,539 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[14]]
## # A tibble: 35,525 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 006fa7… 2015-02-12 TRUE      FALSE             97.7 NA                 
##  2 006fa7… 2015-02-13 FALSE     FALSE             97.9 NA                 
##  3 006fa7… 2015-02-14 FALSE     FALSE             NA   NA                 
##  4 006fa7… 2015-02-15 FALSE     FALSE             97.9 NA                 
##  5 006fa7… 2015-02-16 FALSE     FALSE             NA   NA                 
##  6 006fa7… 2015-02-17 FALSE     FALSE             97.1 NA                 
##  7 006fa7… 2015-02-18 FALSE     FALSE             97.8 NA                 
##  8 006fa7… 2015-02-19 FALSE     FALSE             97.4 NA                 
##  9 006fa7… 2015-02-20 FALSE     FALSE             96.9 NA                 
## 10 006fa7… 2015-02-21 FALSE     FALSE             97.6 NA                 
## # … with 35,515 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[15]]
## # A tibble: 88,893 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000a01… 2017-03-04 TRUE      FALSE               NA NA                 
##  2 000a01… 2017-03-06 FALSE     FALSE               NA NA                 
##  3 000a01… 2017-04-01 TRUE      FALSE               NA NA                 
##  4 000a01… 2017-04-02 FALSE     FALSE               NA NA                 
##  5 000a01… 2017-04-03 FALSE     FALSE               NA NA                 
##  6 000a01… 2017-04-08 FALSE     FALSE               NA NA                 
##  7 000a01… 2017-04-10 FALSE     FALSE               NA NA                 
##  8 000a01… 2017-04-11 FALSE     FALSE               NA NA                 
##  9 000a01… 2017-04-12 FALSE     FALSE               NA NA                 
## 10 000a01… 2017-04-13 FALSE     FALSE               NA NA                 
## # … with 88,883 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[16]]
## # A tibble: 116,110 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000c4c… 2013-06-04 TRUE      FALSE               NA NA                 
##  2 000c4c… 2013-06-05 FALSE     FALSE               NA NA                 
##  3 000c4c… 2013-06-06 FALSE     FALSE               NA NA                 
##  4 000c4c… 2013-06-07 FALSE     FALSE               NA NA                 
##  5 000c4c… 2013-06-08 FALSE     FALSE               NA NA                 
##  6 000c4c… 2013-06-09 FALSE     FALSE               NA NA                 
##  7 000c4c… 2013-07-04 TRUE      FALSE               NA NA                 
##  8 000c4c… 2013-07-05 FALSE     FALSE               NA NA                 
##  9 000c4c… 2013-07-06 FALSE     FALSE               NA NA                 
## 10 000c4c… 2013-08-03 FALSE     FALSE               NA NA                 
## # … with 116,100 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[17]]
## # A tibble: 79,250 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 006ded… 2014-08-15 TRUE      FALSE             NA   NA                 
##  2 006ded… 2014-08-16 FALSE     FALSE             NA   NA                 
##  3 006ded… 2014-08-17 FALSE     FALSE             NA   NA                 
##  4 006ded… 2014-08-18 FALSE     FALSE             NA   NA                 
##  5 006ded… 2014-08-19 FALSE     FALSE             NA   NA                 
##  6 006ded… 2014-08-20 FALSE     FALSE             97.0 NA                 
##  7 006ded… 2014-08-24 FALSE     FALSE             NA   NA                 
##  8 006ded… 2014-08-25 FALSE     FALSE             97.1 NA                 
##  9 006ded… 2014-08-27 FALSE     FALSE             NA   NA                 
## 10 006ded… 2014-08-30 FALSE     FALSE             NA   NA                 
## # … with 79,240 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[18]]
## # A tibble: 108,255 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0005ec… 2016-10-28 TRUE      FALSE             98   1970-01-01 07:46:00
##  2 0005ec… 2016-10-29 FALSE     FALSE             NA   NA                 
##  3 0005ec… 2016-10-30 FALSE     FALSE             NA   NA                 
##  4 0005ec… 2016-10-31 FALSE     FALSE             NA   NA                 
##  5 0005ec… 2016-11-01 FALSE     FALSE             NA   NA                 
##  6 0005ec… 2016-11-02 FALSE     FALSE             NA   NA                 
##  7 0005ec… 2016-11-03 FALSE     FALSE             97.5 1970-01-01 21:41:00
##  8 0005ec… 2016-11-04 FALSE     FALSE             NA   NA                 
##  9 0005ec… 2016-11-05 FALSE     FALSE             NA   NA                 
## 10 0005ec… 2016-11-06 FALSE     FALSE             97.9 1970-01-01 07:09:00
## # … with 108,245 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[19]]
## # A tibble: 114,430 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0013ca… 2014-12-04 FALSE     FALSE               NA NA                 
##  2 0013ca… 2014-12-05 TRUE      FALSE               NA NA                 
##  3 0013ca… 2014-12-06 FALSE     FALSE               NA NA                 
##  4 0013ca… 2014-12-07 FALSE     FALSE               NA NA                 
##  5 0013ca… 2014-12-08 FALSE     FALSE               NA NA                 
##  6 0013ca… 2014-12-09 FALSE     FALSE               NA NA                 
##  7 0013ca… 2015-01-01 TRUE      FALSE               NA NA                 
##  8 0013ca… 2015-01-02 FALSE     FALSE               NA NA                 
##  9 0013ca… 2015-01-03 FALSE     FALSE               NA NA                 
## 10 0013ca… 2015-01-04 FALSE     FALSE               NA NA                 
## # … with 114,420 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[20]]
## # A tibble: 57,124 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 002a79… 2017-10-12 TRUE      FALSE               NA NA                 
##  2 002a79… 2017-10-13 FALSE     FALSE               NA NA                 
##  3 002a79… 2017-10-14 FALSE     FALSE               NA NA                 
##  4 002a79… 2017-10-15 FALSE     FALSE               NA NA                 
##  5 002a79… 2017-11-10 FALSE     FALSE               NA NA                 
##  6 002a79… 2017-11-11 FALSE     FALSE               NA NA                 
##  7 002a79… 2017-11-12 FALSE     FALSE               NA NA                 
##  8 002a79… 2017-11-13 TRUE      FALSE               NA NA                 
##  9 002a79… 2017-11-14 FALSE     FALSE               NA NA                 
## 10 002a79… 2017-11-15 FALSE     FALSE               NA NA                 
## # … with 57,114 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[21]]
## # A tibble: 97,286 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00247c… 2015-11-20 TRUE      FALSE               NA NA                 
##  2 00247c… 2015-11-21 FALSE     FALSE               NA NA                 
##  3 00247c… 2015-11-22 FALSE     FALSE               NA NA                 
##  4 00247c… 2015-11-23 FALSE     FALSE               NA NA                 
##  5 00247c… 2015-11-24 FALSE     FALSE               NA NA                 
##  6 00247c… 2015-11-25 FALSE     FALSE               NA NA                 
##  7 00247c… 2015-12-17 TRUE      FALSE               NA NA                 
##  8 00247c… 2015-12-18 FALSE     FALSE               NA NA                 
##  9 00247c… 2015-12-19 FALSE     FALSE               NA NA                 
## 10 00247c… 2015-12-20 FALSE     FALSE               NA NA                 
## # … with 97,276 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[22]]
## # A tibble: 99,770 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 002b08… 2016-05-16 TRUE      FALSE               NA NA                 
##  2 002b08… 2016-05-17 FALSE     FALSE               NA NA                 
##  3 002b08… 2016-05-18 FALSE     FALSE               NA NA                 
##  4 002b08… 2016-05-19 FALSE     FALSE               NA NA                 
##  5 002b08… 2016-05-20 FALSE     FALSE               NA NA                 
##  6 002b08… 2016-05-21 FALSE     FALSE               NA NA                 
##  7 002b08… 2016-05-22 FALSE     FALSE               NA NA                 
##  8 002b08… 2016-05-23 FALSE     FALSE               NA NA                 
##  9 002b08… 2016-05-24 FALSE     FALSE               NA NA                 
## 10 002b08… 2016-05-25 FALSE     FALSE               NA NA                 
## # … with 99,760 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[23]]
## # A tibble: 53,453 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 009cc5… 2016-08-16 TRUE      FALSE               NA NA                 
##  2 009cc5… 2016-08-17 FALSE     FALSE               NA NA                 
##  3 009cc5… 2016-08-18 FALSE     FALSE               NA NA                 
##  4 009cc5… 2016-09-15 TRUE      FALSE               NA NA                 
##  5 009cc5… 2016-09-16 FALSE     FALSE               NA NA                 
##  6 009cc5… 2016-09-17 FALSE     FALSE               NA NA                 
##  7 009cc5… 2016-09-24 FALSE     FALSE               NA NA                 
##  8 009cc5… 2016-10-08 TRUE      TRUE                NA NA                 
##  9 009cc5… 2016-10-09 FALSE     FALSE               NA NA                 
## 10 009cc5… 2016-10-10 FALSE     FALSE               NA NA                 
## # … with 53,443 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[24]]
## # A tibble: 60,992 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 008145… 2017-08-20 TRUE      FALSE               NA NA                 
##  2 008145… 2017-08-21 FALSE     FALSE               NA NA                 
##  3 008145… 2017-08-22 FALSE     FALSE               NA NA                 
##  4 008145… 2017-08-23 FALSE     FALSE               NA NA                 
##  5 008145… 2017-08-24 FALSE     FALSE               NA NA                 
##  6 008145… 2017-08-25 FALSE     FALSE               NA NA                 
##  7 008145… 2017-08-26 FALSE     FALSE               NA NA                 
##  8 008145… 2017-08-28 FALSE     FALSE               NA NA                 
##  9 008145… 2017-09-02 FALSE     FALSE               NA NA                 
## 10 008145… 2017-09-03 FALSE     FALSE               NA NA                 
## # … with 60,982 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[25]]
## # A tibble: 107,842 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001d3d… 2015-04-29 TRUE      FALSE               NA NA                 
##  2 001d3d… 2015-04-30 FALSE     FALSE               NA NA                 
##  3 001d3d… 2015-05-01 FALSE     FALSE               NA NA                 
##  4 001d3d… 2015-05-02 FALSE     FALSE               NA NA                 
##  5 001d3d… 2015-05-03 FALSE     FALSE               NA NA                 
##  6 001d3d… 2015-05-25 FALSE     FALSE               95 NA                 
##  7 001d3d… 2015-05-29 TRUE      FALSE               NA NA                 
##  8 001d3d… 2015-05-30 FALSE     FALSE               NA NA                 
##  9 001d3d… 2015-05-31 FALSE     FALSE               NA NA                 
## 10 001d3d… 2015-06-01 FALSE     FALSE               NA NA                 
## # … with 107,832 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[26]]
## # A tibble: 96,350 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000729… 2017-05-21 TRUE      FALSE             97.7 1970-01-01 21:56:00
##  2 000729… 2017-05-22 FALSE     FALSE             97.1 1970-01-01 21:47:00
##  3 000729… 2017-05-23 FALSE     FALSE             97.0 1970-01-01 21:44:00
##  4 000729… 2017-05-24 FALSE     FALSE             97.3 1970-01-01 21:54:00
##  5 000729… 2017-05-25 FALSE     FALSE             97.3 1970-01-01 23:55:00
##  6 000729… 2017-05-26 FALSE     FALSE             96.8 1970-01-01 22:44:00
##  7 000729… 2017-05-27 FALSE     FALSE             96.8 1970-01-01 22:44:00
##  8 000729… 2017-05-28 FALSE     FALSE             96.4 1970-01-01 23:36:00
##  9 000729… 2017-05-29 FALSE     FALSE             97.3 1970-01-01 21:46:00
## 10 000729… 2017-05-30 FALSE     FALSE             96.9 1970-01-01 21:02:00
## # … with 96,340 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[27]]
## # A tibble: 122,162 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0008c8… 2015-08-01 TRUE      FALSE             NA   NA                 
##  2 000cd8… 2014-07-14 FALSE     FALSE             NA   NA                 
##  3 000cd8… 2014-07-31 FALSE     FALSE             NA   NA                 
##  4 000cd8… 2014-08-01 FALSE     FALSE             NA   NA                 
##  5 000cd8… 2014-08-09 FALSE     FALSE             NA   NA                 
##  6 000cd8… 2014-08-10 FALSE     FALSE             NA   NA                 
##  7 000cd8… 2014-08-11 FALSE     FALSE             NA   NA                 
##  8 000cd8… 2014-11-24 FALSE     FALSE             97.9 NA                 
##  9 000cd8… 2014-11-25 FALSE     FALSE             98.1 NA                 
## 10 000cd8… 2014-11-26 TRUE      FALSE             97.6 NA                 
## # … with 122,152 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[28]]
## # A tibble: 118,587 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0008a9… 2015-09-28 TRUE      FALSE             NA   NA                 
##  2 0008a9… 2015-09-29 FALSE     FALSE             NA   NA                 
##  3 0008a9… 2015-09-30 FALSE     FALSE             NA   NA                 
##  4 0008a9… 2015-10-01 FALSE     FALSE             NA   NA                 
##  5 0008a9… 2015-10-02 FALSE     FALSE             NA   NA                 
##  6 0008a9… 2015-10-03 FALSE     FALSE             NA   NA                 
##  7 0008a9… 2015-10-18 FALSE     FALSE             NA   NA                 
##  8 0008a9… 2015-10-19 FALSE     FALSE             NA   NA                 
##  9 0008a9… 2015-10-20 FALSE     FALSE             NA   NA                 
## 10 0008a9… 2015-10-22 FALSE     FALSE             97.7 NA                 
## # … with 118,577 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[29]]
## # A tibble: 99,219 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001338… 2015-09-23 TRUE      FALSE             NA   NA                 
##  2 001338… 2015-09-24 FALSE     FALSE             NA   NA                 
##  3 001338… 2015-09-25 FALSE     FALSE             97.4 NA                 
##  4 001338… 2015-09-26 FALSE     FALSE             97.8 NA                 
##  5 001338… 2015-09-27 FALSE     FALSE             98.1 NA                 
##  6 001338… 2015-09-28 FALSE     FALSE             97.6 NA                 
##  7 001338… 2015-09-29 FALSE     FALSE             97.8 NA                 
##  8 001338… 2015-09-30 FALSE     FALSE             96.9 NA                 
##  9 001338… 2015-10-01 FALSE     FALSE             97.3 NA                 
## 10 001338… 2015-10-02 FALSE     FALSE             97.3 NA                 
## # … with 99,209 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[30]]
## # A tibble: 59,070 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 005b37… 2015-10-28 TRUE      FALSE             NA   NA                 
##  2 005b37… 2015-10-29 FALSE     FALSE             NA   NA                 
##  3 005b37… 2015-10-30 FALSE     FALSE             NA   NA                 
##  4 005b37… 2015-10-31 FALSE     FALSE             NA   NA                 
##  5 005b37… 2015-11-01 FALSE     FALSE             NA   NA                 
##  6 005b37… 2015-11-02 FALSE     FALSE             97.9 NA                 
##  7 005b37… 2015-11-03 FALSE     FALSE             97.7 NA                 
##  8 005b37… 2015-11-25 FALSE     FALSE             NA   NA                 
##  9 005b37… 2015-11-26 TRUE      FALSE             NA   NA                 
## 10 005b37… 2015-11-27 FALSE     FALSE             NA   NA                 
## # … with 59,060 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[31]]
## # A tibble: 30,844 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 01298c… 2015-03-24 TRUE      FALSE               NA NA                 
##  2 01298c… 2015-03-25 FALSE     FALSE               NA NA                 
##  3 01298c… 2015-04-07 FALSE     FALSE               NA NA                 
##  4 01298c… 2015-06-21 TRUE      FALSE               NA NA                 
##  5 01298c… 2015-06-22 FALSE     FALSE               NA NA                 
##  6 01298c… 2015-06-23 FALSE     FALSE               NA NA                 
##  7 01298c… 2015-06-24 FALSE     FALSE               NA NA                 
##  8 01298c… 2015-06-25 FALSE     FALSE               NA NA                 
##  9 01298c… 2015-07-20 TRUE      FALSE               NA NA                 
## 10 01298c… 2015-07-21 FALSE     FALSE               NA NA                 
## # … with 30,834 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[32]]
## # A tibble: 70,250 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0005d5… 2016-03-10 TRUE      FALSE             NA   NA                 
##  2 0005d5… 2016-03-11 FALSE     FALSE             NA   NA                 
##  3 0005d5… 2016-03-12 FALSE     FALSE             NA   NA                 
##  4 0005d5… 2016-03-13 FALSE     FALSE             NA   NA                 
##  5 0005d5… 2016-03-14 FALSE     FALSE             NA   NA                 
##  6 0005d5… 2016-03-16 FALSE     FALSE             98.1 NA                 
##  7 0005d5… 2016-03-17 FALSE     FALSE             96.8 NA                 
##  8 000c67… 2018-04-22 TRUE      FALSE             NA   NA                 
##  9 000c67… 2018-04-23 FALSE     FALSE             NA   NA                 
## 10 000c67… 2018-04-24 FALSE     FALSE             NA   NA                 
## # … with 70,240 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[33]]
## # A tibble: 44,844 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 006faf… 2016-04-14 TRUE      FALSE               NA NA                 
##  2 006faf… 2016-04-15 FALSE     FALSE               NA NA                 
##  3 006faf… 2016-04-16 FALSE     FALSE               NA NA                 
##  4 006faf… 2016-04-17 FALSE     FALSE               NA NA                 
##  5 006faf… 2016-04-20 FALSE     FALSE               NA NA                 
##  6 006faf… 2016-04-26 FALSE     FALSE               NA NA                 
##  7 006faf… 2016-04-28 FALSE     FALSE               NA NA                 
##  8 006faf… 2016-04-29 FALSE     FALSE               NA NA                 
##  9 006faf… 2016-05-01 FALSE     FALSE               NA NA                 
## 10 006faf… 2016-05-02 FALSE     FALSE               NA NA                 
## # … with 44,834 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[34]]
## # A tibble: 109,539 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000d3a… 2019-04-19 TRUE      FALSE             NA   NA                 
##  2 000d3a… 2019-04-30 FALSE     FALSE             NA   NA                 
##  3 000d3a… 2019-05-15 TRUE      FALSE             NA   NA                 
##  4 000d3a… 2019-05-25 FALSE     FALSE             NA   NA                 
##  5 000d3a… 2019-06-10 TRUE      FALSE             NA   NA                 
##  6 000d3a… 2019-06-11 FALSE     FALSE             NA   NA                 
##  7 000d3a… 2019-06-12 FALSE     FALSE             NA   NA                 
##  8 000d3a… 2019-06-13 FALSE     FALSE             NA   NA                 
##  9 0020a7… 2016-09-13 TRUE      FALSE             97.5 1970-01-01 07:54:00
## 10 0020a7… 2016-09-14 FALSE     FALSE             97.5 1970-01-01 07:21:00
## # … with 109,529 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[35]]
## # A tibble: 59,929 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00336d… 2016-11-06 TRUE      FALSE               NA NA                 
##  2 00336d… 2016-11-07 FALSE     FALSE               NA NA                 
##  3 00336d… 2016-11-08 FALSE     FALSE               NA NA                 
##  4 00336d… 2016-11-09 FALSE     FALSE               NA NA                 
##  5 00336d… 2016-11-10 FALSE     FALSE               NA NA                 
##  6 00336d… 2016-11-30 TRUE      FALSE               NA NA                 
##  7 00336d… 2016-12-01 FALSE     FALSE               NA NA                 
##  8 00336d… 2016-12-02 FALSE     FALSE               NA NA                 
##  9 00336d… 2016-12-03 FALSE     FALSE               NA NA                 
## 10 00336d… 2016-12-04 FALSE     FALSE               NA NA                 
## # … with 59,919 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[36]]
## # A tibble: 91,435 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000255… 2014-02-05 TRUE      FALSE             96   NA                 
##  2 000255… 2014-02-06 FALSE     FALSE             96.5 NA                 
##  3 000255… 2014-02-07 FALSE     FALSE             NA   NA                 
##  4 000255… 2014-02-08 FALSE     FALSE             NA   NA                 
##  5 000255… 2014-02-09 FALSE     FALSE             NA   NA                 
##  6 000255… 2014-02-10 FALSE     FALSE             97.9 NA                 
##  7 000255… 2014-02-11 FALSE     FALSE             96.9 NA                 
##  8 000255… 2014-02-12 FALSE     FALSE             97.5 NA                 
##  9 000255… 2014-02-13 FALSE     FALSE             97.1 NA                 
## 10 000255… 2014-02-14 FALSE     FALSE             97.9 NA                 
## # … with 91,425 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[37]]
## # A tibble: 66,044 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 003a81… 2015-11-13 TRUE      FALSE             NA   NA                 
##  2 003a81… 2015-11-14 FALSE     FALSE             97.2 NA                 
##  3 003a81… 2015-11-15 FALSE     FALSE             96.8 NA                 
##  4 003a81… 2015-11-16 FALSE     FALSE             98.1 NA                 
##  5 003a81… 2015-11-17 FALSE     FALSE             97.8 NA                 
##  6 003a81… 2015-11-18 FALSE     FALSE             98.8 NA                 
##  7 003a81… 2015-11-19 FALSE     FALSE             98.0 NA                 
##  8 003a81… 2015-11-20 FALSE     FALSE             98.6 NA                 
##  9 003a81… 2015-11-21 FALSE     FALSE             98.9 NA                 
## 10 003a81… 2015-11-22 FALSE     FALSE             NA   NA                 
## # … with 66,034 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[38]]
## # A tibble: 109,770 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000416… 2017-02-06 TRUE      FALSE             95.0 1970-01-01 06:04:00
##  2 000416… 2017-02-07 FALSE     FALSE             94.8 1970-01-01 06:22:00
##  3 000416… 2017-02-08 FALSE     FALSE             96.0 1970-01-01 06:16:00
##  4 000416… 2017-02-09 FALSE     FALSE             95.4 1970-01-01 06:16:00
##  5 000416… 2017-02-10 FALSE     FALSE             NA   NA                 
##  6 000416… 2017-02-11 FALSE     FALSE             NA   NA                 
##  7 000416… 2017-02-12 FALSE     FALSE             94.4 1970-01-01 08:28:00
##  8 000416… 2017-02-13 FALSE     FALSE             96.1 1970-01-01 06:17:00
##  9 000416… 2017-02-14 FALSE     FALSE             95.0 1970-01-01 06:18:00
## 10 000416… 2017-02-15 FALSE     FALSE             96.3 1970-01-01 06:22:00
## # … with 109,760 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[39]]
## # A tibble: 42,598 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000d6d… 2018-08-11 TRUE      FALSE             NA   NA                 
##  2 000d6d… 2018-08-12 FALSE     FALSE             NA   NA                 
##  3 000d6d… 2018-08-13 FALSE     FALSE             NA   NA                 
##  4 000d6d… 2018-08-14 FALSE     FALSE             NA   NA                 
##  5 000d6d… 2018-08-15 FALSE     FALSE             NA   NA                 
##  6 000d6d… 2018-08-17 FALSE     FALSE             98.6 1970-01-01 18:25:00
##  7 000d6d… 2018-08-18 FALSE     FALSE             97.2 1970-01-01 07:18:00
##  8 000d6d… 2018-08-19 FALSE     FALSE             96.4 1970-01-01 20:48:00
##  9 000d6d… 2018-08-22 FALSE     FALSE             97.7 1970-01-01 22:17:00
## 10 000d6d… 2018-08-23 FALSE     FALSE             97.6 1970-01-01 19:26:00
## # … with 42,588 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[40]]
## # A tibble: 80,661 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0014ae… 2016-03-08 TRUE      FALSE               NA NA                 
##  2 0014ae… 2016-03-09 FALSE     FALSE               NA NA                 
##  3 0014ae… 2016-03-10 FALSE     FALSE               NA NA                 
##  4 0014ae… 2016-03-11 FALSE     FALSE               NA NA                 
##  5 0014ae… 2016-03-12 FALSE     FALSE               NA NA                 
##  6 0014ae… 2016-04-04 TRUE      FALSE               NA NA                 
##  7 0014ae… 2016-04-05 FALSE     FALSE               NA NA                 
##  8 0014ae… 2016-04-06 FALSE     FALSE               NA NA                 
##  9 0014ae… 2016-04-07 FALSE     FALSE               NA NA                 
## 10 0014ae… 2016-04-08 FALSE     FALSE               NA NA                 
## # … with 80,651 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[41]]
## # A tibble: 106,090 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0009fe… 2016-09-15 TRUE      FALSE               NA NA                 
##  2 0009fe… 2016-09-16 FALSE     FALSE               NA NA                 
##  3 0009fe… 2016-09-17 FALSE     FALSE               NA NA                 
##  4 0009fe… 2016-09-18 FALSE     FALSE               NA NA                 
##  5 0009fe… 2016-09-19 FALSE     FALSE               NA NA                 
##  6 0009fe… 2016-09-20 FALSE     FALSE               NA NA                 
##  7 0009fe… 2016-10-14 TRUE      FALSE               NA NA                 
##  8 0009fe… 2016-10-15 FALSE     FALSE               NA NA                 
##  9 0009fe… 2016-10-16 FALSE     FALSE               NA NA                 
## 10 0009fe… 2016-10-17 FALSE     FALSE               NA NA                 
## # … with 106,080 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[42]]
## # A tibble: 124,853 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00033c… 2017-07-01 TRUE      FALSE               NA NA                 
##  2 00033c… 2017-07-29 TRUE      FALSE               NA NA                 
##  3 00033c… 2017-07-30 FALSE     FALSE               NA NA                 
##  4 00033c… 2017-07-31 FALSE     FALSE               NA NA                 
##  5 00033c… 2017-08-11 TRUE      FALSE               NA NA                 
##  6 00033c… 2017-08-25 TRUE      FALSE               NA NA                 
##  7 00033c… 2017-08-26 FALSE     FALSE               NA NA                 
##  8 00033c… 2017-08-27 FALSE     FALSE               NA NA                 
##  9 00033c… 2017-08-28 FALSE     FALSE               NA NA                 
## 10 00033c… 2017-08-29 FALSE     FALSE               NA NA                 
## # … with 124,843 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[43]]
## # A tibble: 66,191 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 006285… 2016-06-29 TRUE      FALSE             97.3 1970-01-01 05:52:00
##  2 006285… 2016-06-30 FALSE     FALSE             97.6 1970-01-01 06:41:00
##  3 006285… 2016-07-01 FALSE     FALSE             NA   NA                 
##  4 006285… 2016-07-02 FALSE     FALSE             97.9 1970-01-01 07:59:00
##  5 006285… 2016-07-03 FALSE     FALSE             NA   NA                 
##  6 006285… 2016-07-04 FALSE     FALSE             NA   NA                 
##  7 006285… 2016-07-05 FALSE     FALSE             NA   NA                 
##  8 006285… 2016-07-06 FALSE     FALSE             97.8 1970-01-01 07:02:00
##  9 006285… 2016-07-07 FALSE     FALSE             97.8 1970-01-01 06:49:00
## 10 006285… 2016-07-08 FALSE     FALSE             97.7 1970-01-01 07:21:00
## # … with 66,181 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[44]]
## # A tibble: 121,641 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001ffc… 2015-03-25 TRUE      FALSE             98   NA                 
##  2 001ffc… 2015-03-26 FALSE     FALSE             97.6 NA                 
##  3 001ffc… 2015-03-27 FALSE     FALSE             97.6 NA                 
##  4 001ffc… 2015-03-28 FALSE     FALSE             97.5 NA                 
##  5 001ffc… 2015-03-29 FALSE     FALSE             97.7 NA                 
##  6 001ffc… 2015-03-30 FALSE     FALSE             97.9 NA                 
##  7 001ffc… 2015-03-31 FALSE     FALSE             97.7 NA                 
##  8 001ffc… 2015-04-01 FALSE     FALSE             97.6 NA                 
##  9 001ffc… 2015-04-02 FALSE     FALSE             98   NA                 
## 10 001ffc… 2015-04-03 FALSE     FALSE             97.3 NA                 
## # … with 121,631 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[45]]
## # A tibble: 26,710 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 002ad2… 2014-09-01 TRUE      FALSE             NA   NA                 
##  2 002ad2… 2014-09-02 FALSE     FALSE             NA   NA                 
##  3 002ad2… 2014-09-03 FALSE     FALSE             NA   NA                 
##  4 002ad2… 2014-09-04 FALSE     FALSE             97.7 NA                 
##  5 002ad2… 2014-09-05 FALSE     FALSE             NA   NA                 
##  6 002ad2… 2014-09-06 FALSE     FALSE             97.4 NA                 
##  7 002ad2… 2014-09-07 FALSE     FALSE             98.0 NA                 
##  8 002ad2… 2014-09-08 FALSE     FALSE             97.6 NA                 
##  9 002ad2… 2014-09-09 FALSE     FALSE             97.2 NA                 
## 10 002ad2… 2014-09-10 FALSE     FALSE             97.8 NA                 
## # … with 26,700 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[46]]
## # A tibble: 75,428 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 007b9c… 2017-11-12 TRUE      FALSE             NA   NA                 
##  2 007b9c… 2017-11-13 FALSE     FALSE             NA   NA                 
##  3 007b9c… 2017-11-14 FALSE     FALSE             NA   NA                 
##  4 007b9c… 2018-01-01 TRUE      FALSE             NA   NA                 
##  5 007b9c… 2018-01-02 FALSE     FALSE             NA   NA                 
##  6 007b9c… 2018-01-03 FALSE     FALSE             NA   NA                 
##  7 007b9c… 2018-01-27 TRUE      FALSE             NA   NA                 
##  8 007b9c… 2018-01-28 FALSE     FALSE             NA   NA                 
##  9 007b9c… 2018-01-29 FALSE     FALSE             97.2 1970-01-01 17:44:00
## 10 007b9c… 2018-01-30 FALSE     FALSE             97.2 1970-01-01 17:44:00
## # … with 75,418 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[47]]
## # A tibble: 94,737 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 001a02… 2015-12-17 TRUE      FALSE             NA   NA                 
##  2 001a02… 2015-12-18 FALSE     FALSE             NA   NA                 
##  3 001a02… 2015-12-19 FALSE     FALSE             NA   NA                 
##  4 001a02… 2015-12-20 FALSE     FALSE             NA   NA                 
##  5 001a02… 2015-12-21 FALSE     FALSE             NA   NA                 
##  6 001a02… 2015-12-22 FALSE     FALSE             NA   NA                 
##  7 001a02… 2015-12-23 FALSE     FALSE             NA   NA                 
##  8 001a02… 2015-12-24 FALSE     FALSE             96.8 NA                 
##  9 001a02… 2015-12-25 FALSE     FALSE             97.3 NA                 
## 10 001a02… 2015-12-26 FALSE     FALSE             97.8 NA                 
## # … with 94,727 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[48]]
## # A tibble: 65,067 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0061a0… 2015-08-10 TRUE      FALSE             NA   NA                 
##  2 0061a0… 2015-08-11 FALSE     FALSE             NA   NA                 
##  3 0061a0… 2015-08-12 FALSE     FALSE             NA   NA                 
##  4 0061a0… 2015-08-23 FALSE     FALSE             NA   NA                 
##  5 0061a0… 2015-08-27 FALSE     FALSE             97.7 NA                 
##  6 0061a0… 2015-08-28 FALSE     FALSE             98.1 NA                 
##  7 0061a0… 2015-08-29 FALSE     FALSE             98.1 NA                 
##  8 0061a0… 2015-08-30 FALSE     FALSE             97.9 NA                 
##  9 0061a0… 2015-08-31 FALSE     FALSE             97.7 NA                 
## 10 0061a0… 2015-09-01 FALSE     FALSE             97.5 NA                 
## # … with 65,057 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[49]]
## # A tibble: 76,908 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 009542… 2016-08-21 TRUE      FALSE               NA NA                 
##  2 009542… 2016-08-22 FALSE     FALSE               NA NA                 
##  3 009542… 2016-08-23 FALSE     FALSE               NA NA                 
##  4 009542… 2016-09-05 FALSE     FALSE               NA NA                 
##  5 009542… 2016-09-13 FALSE     FALSE               NA NA                 
##  6 009542… 2016-09-16 FALSE     FALSE               NA NA                 
##  7 00b4c8… 2015-11-26 TRUE      FALSE               NA NA                 
##  8 00b4c8… 2015-11-27 FALSE     FALSE               NA NA                 
##  9 00b4c8… 2015-11-28 FALSE     FALSE               NA NA                 
## 10 00b4c8… 2015-12-26 TRUE      FALSE               NA NA                 
## # … with 76,898 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[50]]
## # A tibble: 54,961 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00b8be… 2017-10-24 TRUE      FALSE               NA NA                 
##  2 00b8be… 2017-10-25 FALSE     FALSE               NA NA                 
##  3 00b8be… 2017-10-26 FALSE     FALSE               NA NA                 
##  4 00b8be… 2017-10-27 FALSE     FALSE               NA NA                 
##  5 00b8be… 2017-10-28 FALSE     FALSE               NA NA                 
##  6 00b8be… 2017-10-29 FALSE     FALSE               NA NA                 
##  7 00b8be… 2017-10-30 FALSE     FALSE               NA NA                 
##  8 00b8be… 2017-10-31 FALSE     FALSE               NA NA                 
##  9 00b8be… 2017-11-01 FALSE     FALSE               NA NA                 
## 10 00b8be… 2017-11-02 FALSE     FALSE               NA NA                 
## # … with 54,951 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[51]]
## # A tibble: 38,091 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00b400… 2014-06-24 TRUE      FALSE               NA NA                 
##  2 00b400… 2014-06-25 FALSE     FALSE               NA NA                 
##  3 00b400… 2014-06-26 FALSE     FALSE               NA NA                 
##  4 00b400… 2014-07-21 TRUE      FALSE               NA NA                 
##  5 00b400… 2014-07-22 FALSE     FALSE               NA NA                 
##  6 00b400… 2014-07-23 FALSE     FALSE               NA NA                 
##  7 00b400… 2014-07-24 FALSE     FALSE               NA NA                 
##  8 00b400… 2014-07-25 FALSE     FALSE               NA NA                 
##  9 00b400… 2014-07-26 FALSE     FALSE               NA NA                 
## 10 00b400… 2014-07-27 FALSE     FALSE               NA NA                 
## # … with 38,081 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[52]]
## # A tibble: 65,793 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00190b… 2017-06-26 TRUE      FALSE               NA NA                 
##  2 00190b… 2017-06-27 FALSE     FALSE               NA NA                 
##  3 00190b… 2017-06-28 FALSE     FALSE               NA NA                 
##  4 00190b… 2017-06-29 FALSE     FALSE               NA NA                 
##  5 00190b… 2017-06-30 FALSE     FALSE               NA NA                 
##  6 00190b… 2017-07-01 FALSE     FALSE               NA NA                 
##  7 00190b… 2017-07-04 FALSE     FALSE               NA NA                 
##  8 00190b… 2017-07-05 FALSE     FALSE               NA NA                 
##  9 00190b… 2017-07-10 FALSE     FALSE               NA NA                 
## 10 00190b… 2017-07-11 FALSE     FALSE               NA NA                 
## # … with 65,783 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[53]]
## # A tibble: 79,901 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00027f… 2016-06-01 TRUE      FALSE             98.5 1970-01-01 16:44:00
##  2 00027f… 2016-06-02 FALSE     FALSE             NA   NA                 
##  3 00027f… 2016-06-03 FALSE     FALSE             NA   NA                 
##  4 00027f… 2016-06-04 FALSE     FALSE             NA   NA                 
##  5 00027f… 2016-06-05 FALSE     FALSE             NA   NA                 
##  6 00027f… 2016-06-06 FALSE     FALSE             97.7 1970-01-01 07:19:00
##  7 00027f… 2016-06-07 FALSE     FALSE             97.9 1970-01-01 07:22:00
##  8 00027f… 2016-06-08 FALSE     FALSE             98.2 1970-01-01 06:45:00
##  9 00027f… 2016-06-09 FALSE     FALSE             97.9 1970-01-01 06:44:00
## 10 00027f… 2016-06-10 FALSE     FALSE             98.4 1970-01-01 06:40:00
## # … with 79,891 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[54]]
## # A tibble: 102,901 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0091f3… 2015-04-19 TRUE      FALSE               NA NA                 
##  2 0091f3… 2015-04-20 FALSE     FALSE               NA NA                 
##  3 0091f3… 2015-04-21 FALSE     FALSE               NA NA                 
##  4 0091f3… 2015-04-22 FALSE     FALSE               NA NA                 
##  5 0091f3… 2015-04-23 FALSE     FALSE               NA NA                 
##  6 0091f3… 2015-04-24 FALSE     FALSE               NA NA                 
##  7 0091f3… 2015-04-25 FALSE     FALSE               NA NA                 
##  8 0091f3… 2015-04-27 FALSE     FALSE               NA NA                 
##  9 0091f3… 2015-05-01 FALSE     FALSE               NA NA                 
## 10 0091f3… 2015-05-02 FALSE     FALSE               NA NA                 
## # … with 102,891 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[55]]
## # A tibble: 45,864 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 005bfc… 2015-04-04 TRUE      FALSE               NA NA                 
##  2 005bfc… 2015-04-05 FALSE     FALSE               NA NA                 
##  3 005bfc… 2015-04-06 FALSE     FALSE               NA NA                 
##  4 005bfc… 2015-04-07 FALSE     FALSE               NA NA                 
##  5 005bfc… 2015-04-08 FALSE     FALSE               NA NA                 
##  6 005bfc… 2015-04-14 FALSE     FALSE               NA NA                 
##  7 005bfc… 2015-05-03 TRUE      FALSE               NA NA                 
##  8 005bfc… 2015-05-04 FALSE     FALSE               NA NA                 
##  9 005bfc… 2015-05-05 FALSE     FALSE               NA NA                 
## 10 005bfc… 2015-05-06 FALSE     FALSE               NA NA                 
## # … with 45,854 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[56]]
## # A tibble: 64,105 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 0047a0… 2018-04-14 TRUE      FALSE               NA NA                 
##  2 0047a0… 2018-04-15 FALSE     FALSE               NA NA                 
##  3 0047a0… 2018-04-16 FALSE     FALSE               NA NA                 
##  4 0047a0… 2018-04-17 FALSE     FALSE               NA NA                 
##  5 0047a0… 2018-04-18 FALSE     FALSE               NA NA                 
##  6 0047a0… 2018-04-21 FALSE     FALSE               NA NA                 
##  7 0047a0… 2018-04-24 FALSE     FALSE               NA NA                 
##  8 0047a0… 2018-04-25 FALSE     FALSE               NA NA                 
##  9 0047a0… 2018-04-26 FALSE     FALSE               NA NA                 
## 10 0047a0… 2018-04-27 FALSE     FALSE               NA NA                 
## # … with 64,095 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[57]]
## # A tibble: 108,004 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000117… 2016-11-15 TRUE      FALSE             NA   NA                 
##  2 000117… 2016-11-16 FALSE     FALSE             97.0 1970-01-01 06:05:00
##  3 000117… 2016-11-17 FALSE     FALSE             97.2 1970-01-01 06:09:00
##  4 000117… 2016-11-18 FALSE     FALSE             97.0 1970-01-01 06:10:00
##  5 000117… 2016-11-19 FALSE     FALSE             97.3 1970-01-01 07:57:00
##  6 000117… 2016-11-20 FALSE     FALSE             97.5 1970-01-01 08:01:00
##  7 000117… 2016-11-21 FALSE     FALSE             97.0 1970-01-01 07:05:00
##  8 000117… 2016-11-22 FALSE     FALSE             96.9 1970-01-01 07:33:00
##  9 000117… 2016-11-23 FALSE     FALSE             97.1 1970-01-01 06:09:00
## 10 000117… 2016-11-24 FALSE     FALSE             97.1 1970-01-01 07:39:00
## # … with 107,994 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[58]]
## # A tibble: 69,189 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00a812… 2015-03-14 TRUE      FALSE               NA NA                 
##  2 00a812… 2015-03-15 FALSE     FALSE               NA NA                 
##  3 00a812… 2015-03-16 FALSE     FALSE               NA NA                 
##  4 00a812… 2015-03-17 FALSE     FALSE               NA NA                 
##  5 00a812… 2015-03-18 FALSE     FALSE               NA NA                 
##  6 00a812… 2015-03-21 FALSE     FALSE               NA NA                 
##  7 00a812… 2015-03-23 FALSE     FALSE               NA NA                 
##  8 00a812… 2015-04-09 FALSE     FALSE               NA NA                 
##  9 00a812… 2015-04-12 TRUE      FALSE               NA NA                 
## 10 00a812… 2015-04-13 FALSE     FALSE               NA NA                 
## # … with 69,179 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[59]]
## # A tibble: 94,125 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000c6d… 2017-07-15 TRUE      FALSE               NA NA                 
##  2 000c6d… 2017-07-16 FALSE     FALSE               NA NA                 
##  3 000c6d… 2017-07-17 FALSE     FALSE               NA NA                 
##  4 000c6d… 2017-07-18 FALSE     FALSE               NA NA                 
##  5 000c6d… 2017-08-12 FALSE     FALSE               NA NA                 
##  6 000c6d… 2017-08-28 FALSE     FALSE               NA NA                 
##  7 000c6d… 2017-09-02 TRUE      FALSE               NA NA                 
##  8 000c6d… 2017-09-03 FALSE     FALSE               NA NA                 
##  9 000c6d… 2017-09-04 FALSE     FALSE               NA NA                 
## 10 000c6d… 2017-09-05 FALSE     FALSE               NA NA                 
## # … with 94,115 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[60]]
## # A tibble: 120,286 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000864… 2014-08-24 TRUE      FALSE             NA   NA                 
##  2 000864… 2014-08-25 FALSE     FALSE             NA   NA                 
##  3 000864… 2014-08-26 FALSE     FALSE             NA   NA                 
##  4 0014b8… 2015-01-27 TRUE      TRUE              NA   NA                 
##  5 0014b8… 2015-02-01 FALSE     FALSE             NA   NA                 
##  6 0014b8… 2015-02-02 FALSE     FALSE             NA   NA                 
##  7 0014b8… 2015-02-03 FALSE     FALSE             97.2 NA                 
##  8 0014b8… 2015-02-04 FALSE     FALSE             97.5 NA                 
##  9 0014b8… 2015-02-05 FALSE     FALSE             97.5 NA                 
## 10 0014b8… 2015-02-06 FALSE     FALSE             97.5 NA                 
## # … with 120,276 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[61]]
## # A tibble: 73,012 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 003402… 2015-09-07 TRUE      FALSE               NA NA                 
##  2 003402… 2015-09-08 FALSE     FALSE               NA NA                 
##  3 003402… 2015-09-09 FALSE     FALSE               NA NA                 
##  4 003402… 2015-09-10 FALSE     FALSE               NA NA                 
##  5 003402… 2015-10-14 TRUE      FALSE               NA NA                 
##  6 003402… 2015-10-15 FALSE     FALSE               NA NA                 
##  7 003402… 2015-10-16 FALSE     FALSE               NA NA                 
##  8 003402… 2015-10-17 FALSE     FALSE               NA NA                 
##  9 003402… 2015-11-17 TRUE      FALSE               NA NA                 
## 10 003402… 2015-11-18 FALSE     FALSE               NA NA                 
## # … with 73,002 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[62]]
## # A tibble: 111,849 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 00017c… 2016-12-15 TRUE      FALSE               NA NA                 
##  2 00017c… 2016-12-16 FALSE     FALSE               NA NA                 
##  3 00017c… 2016-12-17 FALSE     FALSE               NA NA                 
##  4 00017c… 2016-12-18 FALSE     FALSE               NA NA                 
##  5 00017c… 2016-12-19 FALSE     FALSE               NA NA                 
##  6 00017c… 2016-12-20 FALSE     FALSE               NA NA                 
##  7 00017c… 2016-12-24 FALSE     FALSE               NA NA                 
##  8 00017c… 2016-12-27 FALSE     FALSE               NA NA                 
##  9 00017c… 2016-12-30 FALSE     FALSE               NA NA                 
## 10 00017c… 2017-01-03 FALSE     FALSE               NA NA                 
## # … with 111,839 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[63]]
## # A tibble: 40,759 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000230… 2014-09-01 FALSE     FALSE               NA NA                 
##  2 00354b… 2015-12-15 TRUE      FALSE               NA NA                 
##  3 00354b… 2015-12-16 FALSE     FALSE               NA NA                 
##  4 00354b… 2015-12-17 FALSE     FALSE               NA NA                 
##  5 00354b… 2015-12-18 FALSE     FALSE               NA NA                 
##  6 00354b… 2015-12-19 FALSE     FALSE               NA NA                 
##  7 00354b… 2015-12-20 FALSE     FALSE               NA NA                 
##  8 00354b… 2016-01-13 TRUE      FALSE               NA NA                 
##  9 00354b… 2016-01-14 FALSE     FALSE               NA NA                 
## 10 00354b… 2016-01-15 FALSE     FALSE               NA NA                 
## # … with 40,749 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
## 
## [[64]]
## # A tibble: 45,786 x 37
##    user_id date       first_day conception temperature temp_time          
##    <chr>   <date>     <lgl>     <lgl>            <dbl> <dttm>             
##  1 000628… 2016-06-18 TRUE      FALSE               NA NA                 
##  2 000628… 2016-06-19 FALSE     FALSE               NA NA                 
##  3 000628… 2016-06-20 FALSE     FALSE               NA NA                 
##  4 000628… 2016-06-21 FALSE     FALSE               NA NA                 
##  5 000628… 2016-06-22 FALSE     FALSE               NA NA                 
##  6 000628… 2016-06-23 FALSE     FALSE               NA NA                 
##  7 000628… 2016-06-24 FALSE     FALSE               NA NA                 
##  8 000628… 2016-06-25 FALSE     FALSE               NA NA                 
##  9 000628… 2016-06-26 FALSE     FALSE               NA NA                 
## 10 000628… 2016-06-27 FALSE     FALSE               NA NA                 
## # … with 45,776 more rows, and 31 more variables: temp_source <int>,
## #   questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## #   fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## #   cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## #   opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## #   sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## #   preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## #   is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## #   cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## #   cycleday_from_end <dbl>, fertility_counting <dbl>,
## #   fertility_counting_n <dbl>
stopImplicitCluster()

3.3.2 Reproductive objective scores per cycle

reproductive_obj_score = function(fertility, fertility_n, sex){
  score = sum(fertility_n * (sex == 2)) +
    -1 * sum(fertility * (sex == 1)) +
    -0.5 * sum(fertility * (sex == 3)) +
    2 * any(sex == 4)
  
  return(score)
}

tmp_folder = paste0(IO$tmp_data, "Days_with_fertility/")
days_files = list.files(tmp_folder)

cl = makeCluster(par$n_cores)
registerDoParallel(cl)

cycles_agg = foreach(file  = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
  days = read_feather(path = paste0(tmp_folder,file))
  
  cycles_agg = ddply(days,
                     "cycle_id",
                     summarise,
                     reprod_obj_score = reproductive_obj_score(fertility_counting, fertility_counting_n, sex))
  
  return(cycles_agg)
}

stopImplicitCluster()

cycles$reprod_obj_score_counting = cycles_agg$reprod_obj_score[match(cycles$cycle_id, cycles_agg$cycle_id)]

write_feather(cycles, path = paste0(IO$output_data, "cycles.feather"))
ok = file.copy(from = paste0(IO$output_data, "cycles.feather") , to = paste0(IO$tmp_data, "cycles_with_reprod_obj_scores.feather"), overwrite = TRUE)

3.3.3 Reproductive objective scores per pregnancy

score_agg = aggregate(reprod_obj_score_counting ~  pregnancy_id  , cycles, median, na.rm = TRUE)
pregnancies$reprod_obj_score_counting = score_agg$reprod_obj_score_counting[match(pregnancies$pregnancy_id, score_agg$pregnancy_id)]

sex_agg = aggregate(n_tot_sex ~ pregnancy_id, cycles, sum, na.rm = TRUE)
pregnancies$n_tot_sex = sex_agg$n_tot_sex[match(pregnancies$pregnancy_id, sex_agg$pregnancy_id)]


pregnancies$reprod_obj_counting = cut(pregnancies$reprod_obj_score_counting, 
                             breaks = c(-Inf, -0.1,0.1,Inf),
                             labels = c("avoid_preg","unknown","get_preg"))


ggplot(pregnancies, aes(x = reprod_obj_score_counting, fill = reprod_obj_counting))+
  geom_histogram(binwidth = 0.1)
## Warning: Removed 30854 rows containing non-finite values (stat_bin).

write_feather(pregnancies, path = paste0(IO$output_data,"pregnancies.feather"))
file.copy(from = paste0(IO$output_data,"pregnancies.feather"), to = paste0(IO$tmp_data,"pregnancies_with_3_cycles_score.feather") , overwrite = TRUE)
## [1] TRUE

3.3.4 Consistency between user-declared objectives and computed objectives

ggplot(pregnancies[which(pregnancies$reprod_obj_score_counting != 0),], 
       aes(x = reprod_obj_score_counting, fill = reprod_obj_counting))+
  geom_histogram(position = "identity", binwidth = 0.1, alpha = 1) +
  facet_grid(reprod_obj_app ~ . , scale = "free")+
  geom_vline(xintercept = 0)+
  xlim(c(-3,3))
## Warning: Removed 364 rows containing non-finite values (stat_bin).
## Warning: Removed 30 rows containing missing values (geom_bar).

3.3.5 Pregnancy outcomes per reproductive objective

ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj_counting) )+
  geom_histogram(position = "identity",binwidth = 7, alpha = 0.5) +
  facet_grid(reprod_obj_counting ~ . , scale = "free")+
  xlim(0,1000)
## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).

ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj_counting) )+
  geom_bar() +
  facet_grid(reprod_obj_counting ~ . , scale = "free")

Here again, it seems that the users who are trying to avoid pregnancy have proportionally less induced or spontaneous pregnancy losses.

3.4 Based on the sexual behavior within the cycle at which the users got pregnant

The 3 previous cycles may not reflect the intentions of the user at the cycle at which they got pregnant, as they may have changed their mind and got pregnant quite rapidly.

But maybe, we can guess whether they were trying to avoid a pregnancy if they logged protected sex or withdrawal before they logged their first positive pregnancy test. If they were, it may mean that these users got pregnant from a mis-estimation of their fertile window.

cycle_ids = cycles$cycle_id[!is.na(cycles$preg_outcome)]
input_days_folder = paste0(IO$output_data,"Days/")

tic()
cl = makeCluster(par$n_cores)
registerDoParallel(cl)

days_files = list.files(input_days_folder)

cycles_agg = foreach(file  = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
  days = read_feather(path = paste0(input_days_folder,file))
  
  days$fluid_eggwhite[is.na(days$fluid_eggwhite)] = 0
  days$fluid_watery[is.na(days$fluid_watery)] = 0
  days$fertile_mucus = days$fluid_eggwhite + days$fluid_watery
  
  j = which(days$cycle_id %in% cycle_ids)
  days = days[j,]
  days_first_pos_preg_test = cycles$day_first_pos_preg_test[match(days$cycle_id, cycles$cycle_id)]
  j = which(days$cycleday < days_first_pos_preg_test)
  
  cycles_agg = ddply(days[j,],
                     "cycle_id",
                     summarise,
                     n_prot_sex_no_mucus = sum((sex == 1) & (fertile_mucus == 0), na.rm = TRUE),
                     n_withdrawal_no_mucus = sum((sex == 3) & (fertile_mucus == 0), na.rm = TRUE),
                     n_unprot_sex_no_mucus = sum((sex == 2) & (fertile_mucus == 0), na.rm = TRUE),
                     n_insemination_no_mucus = sum((sex == 4) & (fertile_mucus == 0), na.rm = TRUE),
                     n_prot_sex_mucus = sum((sex == 1) & (fertile_mucus > 0), na.rm = TRUE),
                     n_withdrawal_mucus = sum((sex == 3) & (fertile_mucus > 0), na.rm = TRUE),
                     n_unprot_sex_mucus = sum((sex == 2) & (fertile_mucus > 0), na.rm = TRUE),
                     n_insemination_mucus = sum((sex == 4) & (fertile_mucus > 0), na.rm = TRUE))
  
  return(cycles_agg)
}

stopImplicitCluster()
toc()
## 153.937 sec elapsed
cycles_agg_long = melt(cycles_agg, id.vars = "cycle_id")
ggplot(cycles_agg_long, aes(x = value, fill = variable)) + geom_histogram() + facet_grid(variable ~ ., scale = "free") + xlim(c(0.2,30))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 572967 rows containing non-finite values (stat_bin).
## Warning: Removed 16 rows containing missing values (geom_bar).

#table(pmin(5, cycles_agg$n_prot_sex), pmin(10, cycles_agg$n_unprot_sex))
m = match(pregnancies$pregnancy_id, cycles_agg$cycle_id)

colnames_to_add = colnames(cycles_agg[,-1])
for(col_ in colnames_to_add){
  eval(parse(text = paste0("pregnancies$",col_," = cycles_agg$",col_,"[m]")))
}
pregnancies$reprod_obj_score_in_cycle = 0 + 
  - 2*pregnancies$n_prot_sex_mucus +
  - 1*pregnancies$n_prot_sex_no_mucus +
  - 1 * pregnancies$n_withdrawal_mucus +
  - 0.5 * pregnancies$n_withdrawal_no_mucus 

pregnancies$reprod_obj_score_in_cycle[pregnancies$n_unprot_sex_mucus > 1] = 2

pregnancies$reprod_obj_score_in_cycle[(pregnancies$n_insemination_no_mucus + pregnancies$n_insemination_mucus)>0] = 10

pregnancies$reprod_obj_score_in_cycle[is.na(pregnancies$reprod_obj_score_in_cycle)] = 0

pregnancies$reprod_obj_in_cycle = cut(pregnancies$reprod_obj_score_in_cycle, breaks = c(-Inf, -0.5, 0.5, Inf),
                                      labels = c("avoid_preg","unknown","get_preg"))
ggplot(pregnancies[pregnancies$reprod_obj_score_in_cycle <0,], aes(x = reprod_obj_score_in_cycle, fill = reprod_obj_app))+
  geom_histogram(binwidth = 0.5)+
  facet_grid(reprod_obj_app ~. , scale = "free")

table(pregnancies$reprod_obj_in_cycle, pregnancies$reprod_obj_app)
##             
##              avoid_preg get_preg  other   preg track_period
##   avoid_preg        934     1277     11   2890          775
##   unknown          7818    33038    206 100661         9207
##   get_preg         2159     4607     23  11000         2320
ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj_in_cycle) )+
  geom_histogram(position = "identity",binwidth = 7, alpha = 0.5) +
  facet_grid(reprod_obj_in_cycle ~ . , scale = "free")+
  xlim(0,1000)
## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_bar).

ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj_in_cycle) )+
  geom_bar() +
  facet_grid(reprod_obj_in_cycle ~ . , scale = "free")

3.5 Combining all of these indicators

If a user declared not wanting to get pregnant and logged sexual intercourses in the 3 previous cycles and in the conception cycle that were consistent with this declared objective, we can assume that this user was indeed trying to avoid pregnancy.

If, on the other side of the spectrum, a user declared that they wanted to achieve pregnancy and were acting consistently or received insemination, we can safely assume that they wanted to get pregnant.

In all other cases, we will label the user with an “unknown” reproductive objective for the specific pregnancies.

pregnancies$reprod_obj = "unknown"

j = which((pregnancies$reprod_obj_app == "avoid_preg") & 
            ((pregnancies$reprod_obj_counting == "avoid_preg") | 
               (is.na(pregnancies$reprod_obj_counting) & (pregnancies$cycle_nb < 4))) & 
            (pregnancies$reprod_obj_in_cycle %in% c("avoid_preg","unknown")))
pregnancies$reprod_obj[j] = "avoid_preg"


j = which((pregnancies$reprod_obj_app %in% c("get_preg","preg")) & 
            ((pregnancies$reprod_obj_counting == "get_preg") | 
               (is.na(pregnancies$reprod_obj_counting) & (pregnancies$cycle_nb < 4))) & 
            (pregnancies$reprod_obj_in_cycle %in% c("get_preg","unknown")))
pregnancies$reprod_obj[j] = "get_preg"

table(pregnancies$reprod_obj)
## 
## avoid_preg   get_preg    unknown 
##       3809      71902     101215
table(pregnancies$reprod_obj_app)
## 
##   avoid_preg     get_preg        other         preg track_period 
##        10911        38922          240       114551        12302
table(pregnancies$reprod_obj_counting)
## 
## avoid_preg    unknown   get_preg 
##      14456      75303      56313
table(pregnancies$reprod_obj_in_cycle)
## 
## avoid_preg    unknown   get_preg 
##       5887     150930      20109
write_feather(pregnancies, path = paste0(IO$output_data,"pregnancies.feather"))
file.copy(from = paste0(IO$output_data,"pregnancies.feather"), to = paste0(IO$tmp_data,"pregnancies_with_reprod_obj.feather") , overwrite = TRUE)
## [1] TRUE
users$reprod_obj_at_first_pregnancy = pregnancies$reprod_obj[match(paste0(users$user_id, "_",users$first_cycle_preg),pregnancies$pregnancy_id)]


table(users$reprod_obj_at_first_pregnancy, users$reprod_obj_app)
##             
##              avoid_preg get_preg other  preg track_period
##   avoid_preg       3415        0     0     0            0
##   get_preg            0    17542     0 39159            0
##   unknown          5182    14438   199 46492         9138
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_reprod_obj_at_first_pregnancy.feather"), overwrite = TRUE)
## [1] TRUE

3.5.1 Pregnancy outcomes per reproductive objectives (combined indicator)

ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj) )+
  geom_histogram(position = "identity",binwidth = 7, alpha = 0.5) +
  facet_grid(reprod_obj ~ . , scale = "free")+
  xlim(0,1000)
## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_bar).

ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj) )+
  geom_bar() +
  facet_grid(reprod_obj ~ . , scale = "free")

knitr::opts_chunk$set(echo = TRUE, cache = TRUE)

4 Pregnancy Outcomes

4.1 Histogram of pregnancy durations

users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))
#load(paste0(IO$output_data, "days.Rdata"), verbose = TRUE)
g = ggplot(cycles, aes(x = cycle_length, fill = preg_test_class)) + 
  geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity")+
  xlim(c(0,750))+
  facet_grid(preg_test_class ~ ., scale = "free")
g
## Warning: Removed 142683 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).

g = ggplot(cycles[cycles$preg_test_class != "pregnant",], aes(x = cycle_length, fill = preg_test_class)) + 
  geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity", alpha = 0.5)+
  xlim(c(0,150))
  #facet_grid(preg_test_class ~ .)
g
## Warning: Removed 71625 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).

g_hist_lt = ggplot(cycles, aes(x = cycle_length, fill = preg_test_class)) + 
  geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
  geom_histogram(aes(y = ..density..), binwidth = 7, position = "identity", alpha = 0.5)+
  scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7,  labels = viz$xaxis_m*4, limits = c(0,20*28))+
  ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
  scale_fill_discrete(name = "Cycle label")+ theme(legend.position = "bottom")
g_hist_lt
## Warning: Removed 155328 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).

g_hist_st = ggplot(cycles, aes(x = cycle_length, fill = preg_test_class)) +   
  geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
  geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity", alpha = 0.5)+
  scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7,  labels = viz$xaxis_m*4, limits = c(0,150))+
  ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
  guides(fill = FALSE)
  #facet_grid(preg_test_class ~ .)
g_hist_st
## Warning: Removed 209196 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_vline).
## Warning: Removed 8 rows containing missing values (geom_bar).

g_inset = ggplotGrob(g_hist_st +
                  theme(plot.background = element_rect(colour = "gray40")))
## Warning: Removed 209196 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_vline).
## Warning: Removed 8 rows containing missing values (geom_bar).
g_hist_lt + annotation_custom(
    grob = g_inset,
    xmin = 24*7,
    xmax = Inf,
    ymin = 0.03,
    ymax = Inf
  )
## Warning: Removed 155328 rows containing non-finite values (stat_bin).

## Warning: Removed 8 rows containing missing values (geom_bar).

cycles$reprod_obj = users$reprod_obj[match(cycles$user_id, users$user_id)]

g_hist_lt = ggplot(cycles[!is.na(cycles$preg_test_class),], aes(x = cycle_length, fill = reprod_obj )) + 
  geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
  geom_histogram(aes(y = ..density..), binwidth = 7, position = "identity", alpha = 0.5)+
  scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7,  labels = viz$xaxis_m*4, limits = c(0,20*28))+
  ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
  scale_fill_discrete(name = "Cycle label")+ theme(legend.position = "bottom")+
  facet_grid(preg_test_class~.)
g_hist_lt

g_hist_st = ggplot(cycles[!is.na(cycles$preg_test_class),], aes(x = cycle_length, fill = reprod_obj)) +   
  geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
  geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity", alpha = 0.5)+
  scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7,  labels = viz$xaxis_m*4, limits = c(0,150))+
  ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
  facet_grid(preg_test_class ~ .)
g_hist_st


g_inset = ggplotGrob(g_hist_st +
                  theme(plot.background = element_rect(colour = "gray40")))


g_hist_lt + annotation_custom(
    grob = g_inset,
    xmin = 24*7,
    xmax = Inf,
    ymin = 0.03,
    ymax = Inf
  )
j = which(cycles$preg_test_class == "pregnant")

ggplot(cycles[j,], aes(x = preg_outcome, fill = preg_outcome_cat)) + 
  geom_bar(aes(y = (..count..)/sum(..count..)))+
  xlab("Pregnancy outcome")+ ylab("% cycles")+
  scale_fill_manual(values = dict$pregnancy_outcomes$colors)+
  scale_y_continuous(labels=percent)+
  guides(fill = FALSE)

5 Predictors of pregnancy losses

5.1 Computing statistics for the 4 cycles before 1st pregnancy tests

users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))


colnames(cycles)
##  [1] "user_id"                             
##  [2] "start_date"                          
##  [3] "first_day_type"                      
##  [4] "cycle_nb"                            
##  [5] "cycle_id"                            
##  [6] "end_date"                            
##  [7] "cycle_length"                        
##  [8] "n_days_obs"                          
##  [9] "last_obs_day"                        
## [10] "n_pos_preg_test"                     
## [11] "n_neg_preg_test"                     
## [12] "day_from_end_first_pos_preg_test"    
## [13] "day_last_pos_preg_test"              
## [14] "day_last_preg_test"                  
## [15] "n_tot_sex"                           
## [16] "n_prot_sex"                          
## [17] "n_unprot_sex"                        
## [18] "n_withdrawal"                        
## [19] "n_insemination"                      
## [20] "n_BBT"                               
## [21] "day_first_pos_preg_test"             
## [22] "n_days_obs_after_first_pos_preg_test"
## [23] "last_preg_test"                      
## [24] "preg_test_class"                     
## [25] "preg_type"                           
## [26] "birth_year"                          
## [27] "current_age"                         
## [28] "reprod_obj_app"                      
## [29] "preg_outcome_based_on_duration"      
## [30] "cycle_length_next_cycle"             
## [31] "preg_outcome_cat"                    
## [32] "preg_outcome"                        
## [33] "cycle_nb_from_next_preg"             
## [34] "pregnancy_id"                        
## [35] "reprod_obj_score_counting"
cycles$cycle_nb_first_pos_preg_test = users$first_cycle_preg[match(cycles$user_id, users$user_id)]
cycles$cycles_before_preg_4 = (cycles$cycle_nb >= (cycles$cycle_nb_first_pos_preg_test - 4)) & 
  (cycles$cycle_nb <= (cycles$cycle_nb_first_pos_preg_test - 1))

cycles$cycles_before_preg_4[cycles$cycle_nb_first_pos_preg_test < 5] = FALSE

users_cycles_stats = ddply(cycles[which(cycles$cycles_before_preg_4),], 
                            .(user_id), 
                            .fun = summarize,
                            avg_cycle_length_4 = mean(cycle_length, na.rm = TRUE),
                            median_cycle_length_4 = median(cycle_length, na.rm = TRUE),
                            sd_cycle_length_4 = sd(cycle_length, na.rm = TRUE)
)

head(users_cycles_stats)
##                                    user_id avg_cycle_length_4
## 1 00011775cedac39f02b5cf431b2b2b4df8cf1fa7              32.25
## 2 00016999bb7faafc7c29c14dc63c436d1e2ca280              25.75
## 3 00027fe63a06d113e3cfb608c21281315127dbe2              31.00
## 4 00033c627144e7585875d7c3fe3bbe9459fc3a3b              28.25
## 5 000416061bb0401fdf710c414968f48de041e2c1              34.25
## 6 0005161ad07f63eaa0c98e59537a83fe2a4c44eb              30.25
##   median_cycle_length_4 sd_cycle_length_4
## 1                  32.0         2.2173558
## 2                  25.5         0.9574271
## 3                  30.5         1.4142136
## 4                  28.0         1.2583057
## 5                  34.0         2.9860788
## 6                  31.0         3.8622101
column_names = colnames(users_cycles_stats)
column_names = column_names[-which(column_names %in% colnames(users))]
m = match(users$user_id, users_cycles_stats$user_id)
for(column  in column_names){
  eval(parse(text = paste0("users$",column,"= users_cycles_stats$",column,"[m]")))
  #eval(parse(text = paste0("users$",column,"[is.na(users$",column,")]= 0")))
}


# we also need to record the outcome of the first pregnancy

users_first_preg_outcome = cycles[which(cycles$cycle_nb == cycles$cycle_nb_first_pos_preg_test),c("user_id","preg_outcome")]

users$first_preg_outcome = users_first_preg_outcome$preg_outcome[match(users$user_id, users_first_preg_outcome$user_id)]

users$first_preg_outcome_simple = ifelse(users$first_preg_outcome %in% c("TB noBF","BF","PTB"), "LB", #"ExPTB",
                                         ifelse(users$first_preg_outcome %in% c("EPL","LPL"),"PL",NA))

# and the time to first pregnancy test
users_date_first_preg_test = cycles[which(cycles$cycle_nb == cycles$cycle_nb_first_pos_preg_test),c("user_id","start_date")]

users$time_to_first_pos_test_in_days = as.numeric(
  users_date_first_preg_test$start_date[match(users$user_id,users_date_first_preg_test$user_id )] - 
    users$earliest_date)

users$time_to_first_pos_test = users$time_to_first_pos_test_in_days /365


#
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_stats_4_cycles.feather"), overwrite = TRUE)
## [1] TRUE

5.2 Is cycle length predictive of pregnancy outcomes

5.2.1 stats on all previous cycles

users$any_PL = ifelse(users$n_PL>0, TRUE, ifelse(users$n_LB>0,FALSE,NA))

u = users[which(!is.na(users$any_PL)),]


glm_cl = glm(
  any_PL ~ cycle_length_before_preg_avg + 
    cycle_length_before_preg_median + 
    cycle_length_before_preg_sd +
    age_now,
  data = u,
  family = "binomial")
summary(glm_cl)
## 
## Call:
## glm(formula = any_PL ~ cycle_length_before_preg_avg + cycle_length_before_preg_median + 
##     cycle_length_before_preg_sd + age_now, family = "binomial", 
##     data = u)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8483  -1.0944  -0.9351   1.2301   1.7434  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -2.336e+00  1.262e-01 -18.515   <2e-16 ***
## cycle_length_before_preg_avg     9.169e-07  5.285e-04   0.002   0.9986    
## cycle_length_before_preg_median  2.693e-03  8.479e-04   3.175   0.0015 ** 
## cycle_length_before_preg_sd      3.255e-05  9.690e-05   0.336   0.7369    
## age_now                          6.076e-02  3.580e-03  16.973   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 20632  on 14977  degrees of freedom
## Residual deviance: 20320  on 14973  degrees of freedom
##   (31333 observations deleted due to missingness)
## AIC: 20330
## 
## Number of Fisher Scoring iterations: 5
glm_cl = glm(
  any_PL ~ cycle_length_before_preg_median,
  data = u,
  family = "binomial")

summary(glm_cl)
## 
## Call:
## glm(formula = any_PL ~ cycle_length_before_preg_median, family = "binomial", 
##     data = u)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.983  -1.117  -1.116   1.240   1.246  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -0.1601892  0.0118145 -13.559  < 2e-16 ***
## cycle_length_before_preg_median  0.0005209  0.0001856   2.806  0.00501 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 56867  on 41169  degrees of freedom
## Residual deviance: 56857  on 41168  degrees of freedom
##   (5141 observations deleted due to missingness)
## AIC: 56861
## 
## Number of Fisher Scoring iterations: 3
glm_cl = glm(
  any_PL ~ cycle_length_before_preg_sd,
  data = u,
  family = "binomial")

summary(glm_cl)
## 
## Call:
## glm(formula = any_PL ~ cycle_length_before_preg_sd, family = "binomial", 
##     data = u)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.582  -1.101  -1.101   1.256   1.256  
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -1.822e-01  1.047e-02 -17.410   <2e-16 ***
## cycle_length_before_preg_sd  9.842e-06  1.117e-05   0.881    0.378    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50799  on 36862  degrees of freedom
## Residual deviance: 50799  on 36861  degrees of freedom
##   (9448 observations deleted due to missingness)
## AIC: 50803
## 
## Number of Fisher Scoring iterations: 3
ggplot(u, aes(x = cycle_length_before_preg_avg, fill = any_PL))+
  geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1)
## Warning: Removed 5141 rows containing non-finite values (stat_bin).

ggplot(u, aes(x = cycle_length_before_preg_median, fill = any_PL))+
  geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1)
## Warning: Removed 5141 rows containing non-finite values (stat_bin).

ggplot(u, aes(x = cycle_length_before_preg_median, fill = any_PL))+
  geom_density(col = NA, alpha = 0.5, bw = 2)
## Warning: Removed 5141 rows containing non-finite values (stat_density).

ggplot(u, aes(x = log(cycle_length_before_preg_sd), fill = any_PL))+
  geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1)
## Warning: Removed 9805 rows containing non-finite values (stat_bin).

5.2.2 stats on the 4 previous cycles

u = users[which(!is.na(users$first_preg_outcome_simple)),]
u$is_first_preg_a_PL = (u$first_preg_outcome_simple == "PL")

glm_cl = glm(
  is_first_preg_a_PL ~ avg_cycle_length_4 + 
    median_cycle_length_4 + 
    sd_cycle_length_4,
  data = u,
  family = "binomial")
summary(glm_cl)
## 
## Call:
## glm(formula = is_first_preg_a_PL ~ avg_cycle_length_4 + median_cycle_length_4 + 
##     sd_cycle_length_4, family = "binomial", data = u)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7805  -1.0143  -0.9978   1.3456   1.6253  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.075956   0.080433  -0.944 0.344998    
## avg_cycle_length_4    -0.029788   0.007109  -4.190 2.78e-05 ***
## median_cycle_length_4  0.017206   0.004775   3.603 0.000314 ***
## sd_cycle_length_4      0.016870   0.003564   4.733 2.21e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34933  on 25887  degrees of freedom
## Residual deviance: 34876  on 25884  degrees of freedom
##   (15831 observations deleted due to missingness)
## AIC: 34884
## 
## Number of Fisher Scoring iterations: 4
glm_cl = glm(
  is_first_preg_a_PL ~ median_cycle_length_4,
  data = u,
  family = "binomial")

summary(glm_cl)
## 
## Call:
## glm(formula = is_first_preg_a_PL ~ median_cycle_length_4, family = "binomial", 
##     data = u)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.097  -1.017  -1.016   1.346   1.354  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.420955   0.044052  -9.556   <2e-16 ***
## median_cycle_length_4  0.001071   0.001363   0.785    0.432    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34933  on 25887  degrees of freedom
## Residual deviance: 34932  on 25886  degrees of freedom
##   (15831 observations deleted due to missingness)
## AIC: 34936
## 
## Number of Fisher Scoring iterations: 4
glm_cl = glm(
  is_first_preg_a_PL ~ sd_cycle_length_4,
  data = u,
  family = "binomial")

summary(glm_cl)
## 
## Call:
## glm(formula = is_first_preg_a_PL ~ sd_cycle_length_4, family = "binomial", 
##     data = u)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.746  -1.012  -1.010   1.352   1.355  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -0.4093681  0.0131568  -31.11  < 2e-16 ***
## sd_cycle_length_4  0.0018622  0.0003063    6.08  1.2e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34933  on 25887  degrees of freedom
## Residual deviance: 34895  on 25886  degrees of freedom
##   (15831 observations deleted due to missingness)
## AIC: 34899
## 
## Number of Fisher Scoring iterations: 4
glm_cl = glm(
  is_first_preg_a_PL ~ 
    sd_cycle_length_4+
    median_cycle_length_4,
  data = u,
  family = "binomial")

summary(glm_cl)
## 
## Call:
## glm(formula = is_first_preg_a_PL ~ sd_cycle_length_4 + median_cycle_length_4, 
##     family = "binomial", data = u)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.801  -1.013  -1.010   1.351   1.434  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.3531764  0.0456549  -7.736 1.03e-14 ***
## sd_cycle_length_4      0.0020000  0.0003259   6.137 8.39e-10 ***
## median_cycle_length_4 -0.0018667  0.0014528  -1.285    0.199    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34933  on 25887  degrees of freedom
## Residual deviance: 34893  on 25885  degrees of freedom
##   (15831 observations deleted due to missingness)
## AIC: 34899
## 
## Number of Fisher Scoring iterations: 4
ggplot(u, aes(x = avg_cycle_length_4, fill = is_first_preg_a_PL))+
  geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1) + xlim(c(0,100))
## Warning: Removed 16347 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).

ggplot(u, aes(x = median_cycle_length_4, fill = is_first_preg_a_PL))+
  geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1) + xlim(c(0,100))
## Warning: Removed 15906 rows containing non-finite values (stat_bin).

## Warning: Removed 4 rows containing missing values (geom_bar).

ggplot(u, aes(x = log10(sd_cycle_length_4), fill = is_first_preg_a_PL))+
  geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 0.1) 
## Warning: Removed 15945 rows containing non-finite values (stat_bin).

ggplot(u, aes(x = sd_cycle_length_4, fill = is_first_preg_a_PL))+
  geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1) + xlim(c(0,50))
## Warning: Removed 16809 rows containing non-finite values (stat_bin).

## Warning: Removed 4 rows containing missing values (geom_bar).

6 Examples of user tracking history with different pregnancy outcomes

Loading users and days tables.

users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))
pregnancies = read_feather(path = paste0(IO$output_data, "pregnancies.feather"))

6.1 Users with pregnancy loss

j_PL = which((users$n_PL == 1) & 
               (users$n_preg == 1) & 
               (users$user_type == 1) &
               (users$n_cycles %in% 15:20) & 
               (users$n_days_obs %in% 300:400)) 

6.2 Users with LB

j_LB = which((users$n_LB == 1) & 
               (users$n_preg == 1) & 
               (users$user_type == 1)& 
               (users$n_cycles %in% 15:20) & 
               (users$n_days_obs %in% 300:400))

6.3 Selecting the users

j = unique(c(j_LB, j_PL))
batches = table(users$batch[j]) %>%  sort(decreasing = TRUE) %>%  names() %>%  head(10) 

tic()
days = foreach(b = batches, .combine = rbind, .packages = "feather") %do%{
  cat(b, "\n")
  days =  read_feather(path = paste0(IO$output_data,"Days/days_",b,".feather"))
  k = which(days$user_id %in% users$user_id[j])
  days = days[k,]
  return(days)
}
## 58 
## 14 
## 32 
## 63 
## 10 
## 17 
## 23 
## 25 
## 33 
## 4
toc()
## 30.127 sec elapsed
write_feather(days, path = paste0(IO$tmp_data,"days_selected_users_examples_1.feather"))
for(user in unique(days$user_id)){
  cat("\n",user, "\n")
  d = days[which((days$user_id == user) & (days$cycle_nb >= 1)),]
  plot.tracking.history(d = d, show_tests = TRUE, average_temp = FALSE)
}
selected_users_LB = c("5b8a94bc60f1aad5ae030be0dddfbaf7783d99f5",
                      "3232d37bc6999d32faffca55ee3c67d3832ca8e1",
                      "d4a285083dac365f5ef76992bb14a885f69da1a6",
                      "f5e9deb6f71eaedb7aeb91b4637aaf80d910ee75",
                      "8f26d4876c83fde806ad847334b9c8dfbc48f8f6",
                      "5fa514a52a3863f33fc37579fac513911016ead0",
                      "aab98dc98261e99772693ffa81cafb6839e0316e")


selected_users_PL = c("fbf10d778485d325bddffad0c24010e9fbcbaa32",
                      "761e792a2309f4873d07ec95f6f595eb18d64621",
                      "a1862179ab04585bd5d26a325152e2ac377a6ea2",
                      "afdc30d1a29d04fd47ecd67cacf1bd22148ffdf8",
                      "6b99ee04c897ffe7916527b020fd031f99518ca7"
                      
)

others = c("83299899d84f008adad561c75cc7c3cf82106f8f")


user_ids = c(selected_users_LB, selected_users_PL, others)

all(user_ids %in% unique(days$user_id))
## [1] TRUE

6.4 Selected users

for(user in user_ids){
  cat("\n",user, "\n")
  d = days[which((days$user_id == user) & (days$cycle_nb >= 1)),]
  plot.tracking.history(d = d, show_tests = TRUE, average_temp = FALSE)
}
## 
##  5b8a94bc60f1aad5ae030be0dddfbaf7783d99f5

## 
##  3232d37bc6999d32faffca55ee3c67d3832ca8e1

## 
##  d4a285083dac365f5ef76992bb14a885f69da1a6

## 
##  f5e9deb6f71eaedb7aeb91b4637aaf80d910ee75

## 
##  8f26d4876c83fde806ad847334b9c8dfbc48f8f6

## 
##  5fa514a52a3863f33fc37579fac513911016ead0

## 
##  aab98dc98261e99772693ffa81cafb6839e0316e

## 
##  fbf10d778485d325bddffad0c24010e9fbcbaa32

## 
##  761e792a2309f4873d07ec95f6f595eb18d64621

## 
##  a1862179ab04585bd5d26a325152e2ac377a6ea2

## 
##  afdc30d1a29d04fd47ecd67cacf1bd22148ffdf8

## 
##  6b99ee04c897ffe7916527b020fd031f99518ca7

## 
##  83299899d84f008adad561c75cc7c3cf82106f8f

user_ids = c("5b8a94bc60f1aad5ae030be0dddfbaf7783d99f5",
"3232d37bc6999d32faffca55ee3c67d3832ca8e1",
"761e792a2309f4873d07ec95f6f595eb18d64621",
"d4a285083dac365f5ef76992bb14a885f69da1a6")

days = days[which(days$user_id %in% user_ids),]
save(days, file = paste0(IO$tmp_data,"days_selected_users.Rdata"))